aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/min-max.ll138
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll83
-rw-r--r--llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll8
-rw-r--r--llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll118
-rw-r--r--llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll35
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll32
-rw-r--r--llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll55
-rw-r--r--llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll52
-rw-r--r--llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll91
-rw-r--r--llvm/test/CodeGen/AArch64/sve-vector-interleave.ll103
-rw-r--r--llvm/test/CodeGen/AArch64/sve-vscale-combine.ll105
-rw-r--r--llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir97
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctpop16.ll330
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll2841
-rw-r--r--llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll98
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-args.ll733
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll210
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll303
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll403
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll385
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll232
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.pk.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll416
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i16.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i8.ll1300
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i16.ll79
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i8.ll2595
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i16.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/memmove-var-size.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/min.ll306
-rw-r--r--llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll34
-rw-r--r--llvm/test/CodeGen/AMDGPU/shl.ll59
-rw-r--r--llvm/test/CodeGen/AMDGPU/sra.ll67
-rw-r--r--llvm/test/CodeGen/AMDGPU/udivrem24.ll1984
-rw-r--r--llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll16
-rw-r--r--llvm/test/CodeGen/ARM/cmp-select-sign.ll32
-rw-r--r--llvm/test/CodeGen/ARM/nop_concat_vectors.ll8
-rw-r--r--llvm/test/CodeGen/Generic/fp128-exp10-libcall.ll28
-rw-r--r--llvm/test/CodeGen/Generic/fp128-math-libcalls.ll10
-rw-r--r--llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll38
-rw-r--r--llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll24
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll66
-rw-r--r--llvm/test/CodeGen/NVPTX/param-vectorize-device.ll22
-rw-r--r--llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll61
-rw-r--r--llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll347
-rw-r--r--llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll5
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll72
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll33
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll46
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll144
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll10
-rw-r--r--llvm/test/CodeGen/RISCV/zilsd.ll19
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll12
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll12
-rw-r--r--llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll62
-rw-r--r--llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll4
-rw-r--r--llvm/test/CodeGen/SPIRV/logical-struct-access.ll3
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll46
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll54
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll75
-rw-r--r--llvm/test/CodeGen/X86/avx512f-large-stack.ll23
-rw-r--r--llvm/test/CodeGen/X86/cmp.ll182
-rw-r--r--llvm/test/CodeGen/X86/exp10-libcall-names.ll45
-rw-r--r--llvm/test/CodeGen/X86/exp10l-libcall-names.ll46
-rw-r--r--llvm/test/CodeGen/X86/huge-stack.ll72
-rw-r--r--llvm/test/CodeGen/X86/large-displacements-fastisel.ll18
-rw-r--r--llvm/test/CodeGen/X86/large-displacements.ll82
-rw-r--r--llvm/test/CodeGen/X86/merge-huge-sp-updates.ll4
-rw-r--r--llvm/test/CodeGen/X86/stack-clash-extra-huge.ll28
-rw-r--r--llvm/test/CodeGen/X86/stack-clash-huge.ll36
-rw-r--r--llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll2
-rw-r--r--llvm/test/DebugInfo/PDB/obj-globalhash.test24
-rw-r--r--llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll180
-rw-r--r--llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test16
-rw-r--r--llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s8
-rw-r--r--llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll4
-rw-r--r--llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s9
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s399
-rw-r--r--llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s399
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt399
-rw-r--r--llvm/test/MC/ELF/many-instructions.s10
-rw-r--r--llvm/test/MC/ELF/mc-dump.s5
-rw-r--r--llvm/test/Object/archive-darwin-duplicates.test10
-rw-r--r--llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td8
-rw-r--r--llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td32
-rw-r--r--llvm/test/TableGen/GlobalISelEmitter/HwModes.td4
-rw-r--r--llvm/test/Transforms/GVN/PRE/pre-after-rle.ll146
-rw-r--r--llvm/test/Transforms/GVN/PRE/rle.ll2188
-rw-r--r--llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll24
-rw-r--r--llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll182
-rw-r--r--llvm/test/Transforms/InstCombine/gepofconstgepi8.ll180
-rw-r--r--llvm/test/Transforms/InstCombine/load-cmp.ll32
-rw-r--r--llvm/test/Transforms/InstCombine/phi.ll28
-rw-r--r--llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll36
-rw-r--r--llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll48
-rw-r--r--llvm/test/Transforms/LICM/gep-reassociate.ll53
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll8
-rw-r--r--llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll155
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll63
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll88
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/f16.ll45
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll59
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll1481
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll729
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll634
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll75
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll60
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll33
-rw-r--r--llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll73
-rw-r--r--llvm/test/Transforms/LoopVectorize/vect.stats.ll73
-rw-r--r--llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll24
-rw-r--r--llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll1103
-rw-r--r--llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll3
-rw-r--r--llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll3
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll1075
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll4
-rw-r--r--llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll2
-rw-r--r--llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll6
-rw-r--r--llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll6
-rw-r--r--llvm/test/lit.cfg.py39
-rw-r--r--llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test7
-rw-r--r--llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test19
-rw-r--r--llvm/test/tools/llvm-dwarfdump/dump_dwo.test7
-rw-r--r--llvm/test/tools/llvm-ir2vec/embeddings.ll14
-rw-r--r--llvm/test/tools/llvm-ir2vec/entities.ll2
-rw-r--r--llvm/test/tools/llvm-ir2vec/error-handling.ll13
-rw-r--r--llvm/test/tools/llvm-ir2vec/triplets.ll2
-rw-r--r--llvm/test/tools/llvm-libtool-darwin/L-and-l.test91
-rw-r--r--llvm/test/tools/llvm-objcopy/COFF/dump-section.test2
-rw-r--r--llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll18
135 files changed, 21629 insertions, 4117 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index b824f53..a579eb3 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -377,6 +377,144 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.minimumnum.f32(float poison, float poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.minimumnum.f64(double poison, double poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call float @llvm.minimumnum.f32(float poison, float poison)
+ call double @llvm.minimumnum.f64(double poison, double poison)
+ call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+ call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+ call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+ call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+ call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+ call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+ call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+ call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+ ret void
+}
+
+define void @minimumnum_fp16() {
+; CHECK-NOF16-LABEL: 'minimumnum_fp16'
+; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-F16-LABEL: 'minimumnum_fp16'
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call half @llvm.minimumnum.f32(half poison, half poison)
+ call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+ call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+ call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+ call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+ ret void
+}
+
+define void @minimumnum_bf16() {
+; CHECK-LABEL: 'minimumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.minimumnum.bf16(bfloat poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call bfloat @llvm.minimumnum.f32(bfloat poison, bfloat poison)
+ call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison)
+ call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+ call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+ call <16 x bfloat> @llvm.minimumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+ ret void
+}
+
+define void @maximumnum() {
+; CHECK-LABEL: 'maximumnum'
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.maximumnum.f32(float poison, float poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.maximumnum.f64(double poison, double poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call float @llvm.maximumnum.f32(float poison, float poison)
+ call double @llvm.maximumnum.f64(double poison, double poison)
+ call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison)
+ call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison)
+ call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison)
+ call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison)
+ call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison)
+ call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison)
+ call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison)
+ call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison)
+ ret void
+}
+
+define void @maximumnum_fp16() {
+; CHECK-NOF16-LABEL: 'maximumnum_fp16'
+; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-F16-LABEL: 'maximumnum_fp16'
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call half @llvm.maximumnum.f32(half poison, half poison)
+ call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison)
+ call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison)
+ call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison)
+ call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison)
+ ret void
+}
+
+define void @maximumnum_bf16() {
+; CHECK-LABEL: 'maximumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.maximumnum.bf16(bfloat poison, bfloat poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call bfloat @llvm.maximumnum.f32(bfloat poison, bfloat poison)
+ call <2 x bfloat> @llvm.maximumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison)
+ call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+ call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+ call <16 x bfloat> @llvm.maximumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+ ret void
+}
+
declare i8 @llvm.umin.i8(i8, i8)
declare i16 @llvm.umin.i16(i16, i16)
declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
index 829ce12..5d11133 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
@@ -255,6 +255,89 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x float> @llvm.minimumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ call <vscale x 8 x float> @llvm.minimumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ call <vscale x 16 x float> @llvm.minimumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+ call <vscale x 2 x double> @llvm.minimumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ call <vscale x 4 x double> @llvm.minimumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ call <vscale x 8 x double> @llvm.minimumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+ call <vscale x 16 x double> @llvm.minimumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+ ret void
+ call <vscale x 2 x half> @llvm.minimumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ call <vscale x 4 x half> @llvm.minimumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ call <vscale x 8 x half> @llvm.minimumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ call <vscale x 16 x half> @llvm.minimumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ call <vscale x 2 x bfloat> @llvm.minimumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ call <vscale x 4 x bfloat> @llvm.minimumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ call <vscale x 8 x bfloat> @llvm.minimumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ call <vscale x 16 x bfloat> @llvm.minimumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ ret void
+}
+
+define void @maximumnum() {
+; CHECK-LABEL: 'maximumnum'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.maximumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.maximumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.maximumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.maximumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.maximumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.maximumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.maximumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x float> @llvm.maximumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ call <vscale x 4 x float> @llvm.maximumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ call <vscale x 8 x float> @llvm.maximumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ call <vscale x 16 x float> @llvm.maximumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+ call <vscale x 2 x double> @llvm.maximumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ call <vscale x 4 x double> @llvm.maximumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ call <vscale x 8 x double> @llvm.maximumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+ call <vscale x 16 x double> @llvm.maximumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+ ret void
+ call <vscale x 2 x half> @llvm.maximumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ call <vscale x 4 x half> @llvm.maximumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ call <vscale x 8 x half> @llvm.maximumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ call <vscale x 16 x half> @llvm.maximumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ call <vscale x 2 x bfloat> @llvm.maximumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ call <vscale x 4 x bfloat> @llvm.maximumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ call <vscale x 8 x bfloat> @llvm.maximumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ call <vscale x 16 x bfloat> @llvm.maximumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ ret void
+}
+
+
declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
index 1dc8d4a7..207a44d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll
@@ -505,7 +505,7 @@ e.1:
ret i32 1
}
-define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) {
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
@@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
-; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: (Low: %B High: (2000 + %B))
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
; CHECK-NEXT: Group GRP1:
-; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr))
+; CHECK-NEXT: (Low: %A High: (2000 + %A))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
@@ -565,7 +565,7 @@ e.2:
ret void
}
-define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) {
+define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree {
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small'
; CHECK-NEXT: loop.header:
; CHECK-NEXT: Memory dependences are safe with run-time checks
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index d93d521..d896a1b 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -1,16 +1,38 @@
-; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMDEP
+; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMSSA
target datalayout = "e-p:64:64:64"
; GVN should ignore the store to p1 to see that the load from p is
; fully redundant.
-; CHECK: @yes
-; CHECK: if.then:
-; CHECK-NEXT: store i32 0, ptr %q
-; CHECK-NEXT: ret void
-
define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind {
+; CHECK-MEMDEP-LABEL: define void @yes(
+; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK-MEMDEP: [[IF_THEN]]:
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4
+; CHECK-MEMDEP-NEXT: ret void
+; CHECK-MEMDEP: [[IF_ELSE]]:
+; CHECK-MEMDEP-NEXT: ret void
+;
+; CHECK-MEMSSA-LABEL: define void @yes(
+; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK-MEMSSA: [[IF_THEN]]:
+; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4
+; CHECK-MEMSSA-NEXT: ret void
+; CHECK-MEMSSA: [[IF_ELSE]]:
+; CHECK-MEMSSA-NEXT: ret void
+;
entry:
store i32 0, ptr %p, !tbaa !1
store i32 1, ptr %p1, !tbaa !2
@@ -30,16 +52,22 @@ if.else:
; the other type could be unified with the first type, however for now, GVN
; should just be conservative.
-; CHECK: @watch_out_for_type_change
-; CHECK: if.then:
-; CHECK: %t = load i32, ptr %p
-; CHECK: store i32 %t, ptr %q
-; CHECK: ret void
-; CHECK: if.else:
-; CHECK: %u = load i32, ptr %p
-; CHECK: store i32 %u, ptr %q
-
define void @watch_out_for_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind {
+; CHECK-LABEL: define void @watch_out_for_type_change(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT: store i32 [[T]], ptr [[Q]], align 4
+; CHECK-NEXT: ret void
+; CHECK: [[IF_ELSE]]:
+; CHECK-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8:![0-9]+]]
+; CHECK-NEXT: store i32 [[U]], ptr [[Q]], align 4
+; CHECK-NEXT: ret void
+;
entry:
store i32 0, ptr %p, !tbaa !1
store i32 1, ptr %p1, !tbaa !2
@@ -59,15 +87,36 @@ if.else:
; As before, but the types are swapped. This time GVN does managed to
; eliminate one of the loads before noticing the type mismatch.
-; CHECK: @watch_out_for_another_type_change
-; CHECK: if.then:
-; CHECK: store i32 0, ptr %q
-; CHECK: ret void
-; CHECK: if.else:
-; CHECK: %u = load i32, ptr %p
-; CHECK: store i32 %u, ptr %q
-
define void @watch_out_for_another_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind {
+; CHECK-MEMDEP-LABEL: define void @watch_out_for_another_type_change(
+; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
+; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]]
+; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK-MEMDEP: [[IF_THEN]]:
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4
+; CHECK-MEMDEP-NEXT: ret void
+; CHECK-MEMDEP: [[IF_ELSE]]:
+; CHECK-MEMDEP-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]]
+; CHECK-MEMDEP-NEXT: store i32 [[U]], ptr [[Q]], align 4
+; CHECK-MEMDEP-NEXT: ret void
+;
+; CHECK-MEMSSA-LABEL: define void @watch_out_for_another_type_change(
+; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
+; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]]
+; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]]
+; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK-MEMSSA: [[IF_THEN]]:
+; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8]]
+; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4
+; CHECK-MEMSSA-NEXT: ret void
+; CHECK-MEMSSA: [[IF_ELSE]]:
+; CHECK-MEMSSA-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]]
+; CHECK-MEMSSA-NEXT: store i32 [[U]], ptr [[Q]], align 4
+; CHECK-MEMSSA-NEXT: ret void
+;
entry:
store i32 0, ptr %p, !tbaa !1
store i32 1, ptr %p1, !tbaa !2
@@ -94,3 +143,26 @@ if.else:
!7 = !{!"outer space", !9}
!8 = !{!"brick red", !5}
!9 = !{!"observable universe"}
+;.
+; CHECK-MEMDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-MEMDEP: [[META1]] = !{!"red", [[META2:![0-9]+]]}
+; CHECK-MEMDEP: [[META2]] = !{}
+; CHECK-MEMDEP: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK-MEMDEP: [[META4]] = !{!"blu", [[META2]]}
+; CHECK-MEMDEP: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK-MEMDEP: [[META6]] = !{!"outer space", [[META7:![0-9]+]]}
+; CHECK-MEMDEP: [[META7]] = !{!"observable universe"}
+; CHECK-MEMDEP: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK-MEMDEP: [[META9]] = !{!"brick red", [[META1]]}
+;.
+; CHECK-MEMSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK-MEMSSA: [[META1]] = !{!"red", [[META2:![0-9]+]]}
+; CHECK-MEMSSA: [[META2]] = !{}
+; CHECK-MEMSSA: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CHECK-MEMSSA: [[META4]] = !{!"blu", [[META2]]}
+; CHECK-MEMSSA: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK-MEMSSA: [[META6]] = !{!"outer space", [[META7:![0-9]+]]}
+; CHECK-MEMSSA: [[META7]] = !{!"observable universe"}
+; CHECK-MEMSSA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0}
+; CHECK-MEMSSA: [[META9]] = !{!"brick red", [[META1]]}
+;.
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index 10c656a..30b74cb 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -796,6 +796,41 @@ define amdgpu_kernel void @v_permlane32_swap(ptr addrspace(1) %out, i32 %src0, i
ret void
}
+; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2)
+define amdgpu_kernel void @v_permlane_bcast_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+ %result= call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2)
+define amdgpu_kernel void @v_permlane_up_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+ %result= call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2)
+define amdgpu_kernel void @v_permlane_down_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+ %result= call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2)
+define amdgpu_kernel void @v_permlane_xor_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+ %result= call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1)
+define amdgpu_kernel void @v_permlane_idx_gen_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
+ %result= call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.dead.i32()
define amdgpu_cs_chain void @dead(ptr addrspace(1) %out) {
%v = call i32 @llvm.amdgcn.dead.i32()
diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 578038b..d9cdac4 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,8 +1,8 @@
; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
-; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
+; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
@@ -38,24 +38,12 @@ if.end: ; preds = %if.then, %entry
; CHECK-NOT: madd
; CHECK:ldr
-; CHECK-NoAA-LABEL: @test_GEP_CSE(
-; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64
-; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
-; CHECK-NoAA: add i64 [[PTR2]], 23052
-; CHECK-NoAA: inttoptr
-; CHECK-NoAA: if.then:
-; CHECK-NoAA-NOT: ptrtoint
-; CHECK-NoAA-NOT: mul
-; CHECK-NoAA: add i64 [[PTR2]], 23048
-; CHECK-NoAA: inttoptr
-
-; CHECK-UseAA-LABEL: @test_GEP_CSE(
-; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052
-; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048
+; CHECK-IR-LABEL: @test_GEP_CSE(
+; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]]
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052
+; CHECK-IR: if.then:
+; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
%struct.pt = type { ptr, i32, i32 }
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll b/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll
new file mode 100644
index 0000000..4a601f1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple arm64ec-windows-msvc -o - %s | FileCheck %s
+
+; Arm64EC Regression Test: The Arm64EC Call Lowering was placing "available
+; externally" items in COMDATs, which is not permitted by the module verifier.
+
+define available_externally float @f() {
+entry:
+ ret float 0x0
+}
+
+define i32 @caller() {
+entry:
+ call float @f()
+ ret i32 0
+}
+
+; Normal function gets an entry thunk, but not an exit thunk.
+; CHECK-DAG: $ientry_thunk$cdecl$i8$v:
+; CHECK-NOT: $iexit_thunk$cdecl$i8$v:
+
+; Available Externally function gets an exit thunk, but not an entry thunk.
+; CHECK-DAG: $iexit_thunk$cdecl$f$v:
+; CHECK-DAG: "#f$exit_thunk":
+; CHECK-NOT: $ientry_thunk$cdecl$f$v:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index 880bd29..d67aa08 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -14,20 +14,19 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: cntd x9
; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
+; CHECK-NEXT: mov x11, x9
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: zip2 p2.d, p1.d, p1.d
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: zip1 p1.d, p1.d, p1.d
; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl]
@@ -39,14 +38,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x11, x8
; CHECK-NEXT: add x11, x11, x9
; CHECK-NEXT: b.mi .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -111,21 +110,20 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: neg x10, x9
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
; CHECK-NEXT: cmp x10, x8
@@ -141,12 +139,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -213,21 +211,20 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: mov w8, #100 // =0x64
-; CHECK-NEXT: cntd x9
; CHECK-NEXT: whilelo p1.d, xzr, x8
+; CHECK-NEXT: cntd x9
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: cnth x11
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cnth x11
; CHECK-NEXT: mov x12, x9
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2]
-; CHECK-NEXT: mov z6.d, z1.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: add x2, x2, x11
; CHECK-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
@@ -243,14 +240,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0
; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: mov z0.d, p2/m, z7.d
-; CHECK-NEXT: mov z1.d, p1/m, z6.d
+; CHECK-NEXT: mov z1.d, p2/m, z7.d
+; CHECK-NEXT: mov z0.d, p1/m, z6.d
; CHECK-NEXT: whilelo p1.d, x12, x8
; CHECK-NEXT: add x12, x12, x9
; CHECK-NEXT: b.mi .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 29be231..0646ca4 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -14,15 +14,14 @@ target triple = "aarch64"
define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntd x8
-; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #2
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
@@ -32,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
; CHECK-NEXT: add x0, x0, x10
-; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
-; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -183,17 +182,16 @@ exit.block: ; preds = %vector.body
define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-LABEL: complex_mul_v2f64_unrolled:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: cntw x8
-; CHECK-NEXT: mov w10, #1000 // =0x3e8
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: neg x9, x8
+; CHECK-NEXT: mov w10, #1000 // =0x3e8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and x9, x9, x10
; CHECK-NEXT: rdvl x10, #4
-; CHECK-NEXT: zip2 z0.d, z1.d, z1.d
-; CHECK-NEXT: zip1 z1.d, z1.d, z1.d
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: mov z3.d, z0.d
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
@@ -207,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
; CHECK-NEXT: add x1, x1, x10
-; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
-; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
+; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
-; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: faddv d0, p0, z1.d
@@ -310,15 +308,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-LABEL: reduction_mix:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: cntd x9
-; CHECK-NEXT: mov w11, #100 // =0x64
+; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: neg x10, x9
+; CHECK-NEXT: mov w11, #100 // =0x64
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: and x10, x10, x11
; CHECK-NEXT: rdvl x11, #2
-; CHECK-NEXT: zip2 z0.d, z2.d, z2.d
-; CHECK-NEXT: zip1 z1.d, z2.d, z2.d
; CHECK-NEXT: .LBB3_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr z3, [x0]
@@ -327,13 +325,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2]
; CHECK-NEXT: add x8, x8, x9
; CHECK-NEXT: cmp x10, x8
-; CHECK-NEXT: fadd z0.d, z4.d, z0.d
-; CHECK-NEXT: fadd z1.d, z3.d, z1.d
+; CHECK-NEXT: fadd z1.d, z4.d, z1.d
+; CHECK-NEXT: fadd z0.d, z3.d, z0.d
; CHECK-NEXT: add z2.d, z5.d, z2.d
; CHECK-NEXT: b.ne .LBB3_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d
-; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z3.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z1.d, z0.d, z1.d
; CHECK-NEXT: uaddv d2, p0, z2.d
; CHECK-NEXT: faddv d0, p0, z3.d
; CHECK-NEXT: faddv d1, p0, z1.d
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index a9618fd..05ecc9e 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -131,18 +131,83 @@ define <4 x i64> @interleave2_v4i64(<2 x i64> %vec0, <2 x i64> %vec1) {
ret <4 x i64> %retval
}
+define <4 x i16> @interleave2_same_const_splat_v4i16() {
+; CHECK-SD-LABEL: interleave2_same_const_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.4h, #3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_same_const_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #3 // =0x3
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
+ ret <4 x i16> %retval
+}
+
+define <4 x i16> @interleave2_diff_const_splat_v4i16() {
+; CHECK-SD-LABEL: interleave2_diff_const_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI11_0
+; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI11_0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_diff_const_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #3 // =0x3
+; CHECK-GI-NEXT: mov w9, #4 // =0x4
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 4))
+ ret <4 x i16> %retval
+}
-; Float declarations
-declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>)
-declare <8 x half> @llvm.vector.interleave2.v8f16(<4 x half>, <4 x half>)
-declare <16 x half> @llvm.vector.interleave2.v16f16(<8 x half>, <8 x half>)
-declare <4 x float> @llvm.vector.interleave2.v4f32(<2 x float>, <2 x float>)
-declare <8 x float> @llvm.vector.interleave2.v8f32(<4 x float>, <4 x float>)
-declare <4 x double> @llvm.vector.interleave2.v4f64(<2 x double>, <2 x double>)
-
-; Integer declarations
-declare <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8>, <16 x i8>)
-declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>)
-declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>)
-declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>)
+define <4 x i16> @interleave2_same_nonconst_splat_v4i16(i16 %a) {
+; CHECK-SD-LABEL: interleave2_same_nonconst_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v0.4h, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_same_nonconst_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v0.4h, w0
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
+ %ins = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat, <2 x i16> %splat)
+ ret <4 x i16> %retval
+}
+
+define <4 x i16> @interleave2_diff_nonconst_splat_v4i16(i16 %a, i16 %b) {
+; CHECK-SD-LABEL: interleave2_diff_nonconst_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w0
+; CHECK-SD-NEXT: mov v0.h[2], w1
+; CHECK-SD-NEXT: mov v0.h[3], w1
+; CHECK-SD-NEXT: rev32 v1.4h, v0.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_diff_nonconst_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v0.4h, w0
+; CHECK-GI-NEXT: dup v1.4h, w1
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
+ %ins1 = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <2 x i16> %ins1, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %ins2 = insertelement <2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <2 x i16> %ins2, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat1, <2 x i16> %splat2)
+ ret <4 x i16> %retval
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index 52cb2d9..c7fb2db 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -267,7 +267,7 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal
; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; SME2-NEXT: zip { z0.h - z3.h }, { z0.h - z3.h }
; SME2-NEXT: ret
- %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
+ %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv32i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
ret <vscale x 32 x i16> %retval
}
@@ -540,30 +540,81 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
ret <vscale x 4 x i32> %retval
}
-; Float declarations
-declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>)
-declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>)
-declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float>, <vscale x 2 x float>)
-declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+; CHECK-LABEL: interleave2_same_const_splat_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, #3 // =0x3
+; CHECK-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
+; SVE-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z0.d, #4 // =0x4
+; SVE-NEXT: mov z1.d, #3 // =0x3
+; SVE-NEXT: zip2 z2.d, z1.d, z0.d
+; SVE-NEXT: zip1 z0.d, z1.d, z0.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: mov z0.d, #4 // =0x4
+; SME2-NEXT: mov z1.d, #3 // =0x3
+; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4))
+ ret <vscale x 4 x i16> %retval
+}
-; Integer declarations
-declare <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
+; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: ret
+ %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
+ ret <vscale x 4 x i16> %retval
+}
-; Predicated
-declare <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
-declare <vscale x 16 x i1> @llvm.vector.interleave2.nxv16i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
-declare <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
-declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
-
-; Illegal type size
-declare <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>)
-declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>, <vscale x 4 x i64>)
-
-declare <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
-declare <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
-declare <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) {
+; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $w1 killed $w1 def $x1
+; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
+; SVE-NEXT: mov z0.d, x0
+; SVE-NEXT: mov z1.d, x1
+; SVE-NEXT: zip2 z2.d, z0.d, z1.d
+; SVE-NEXT: zip1 z0.d, z0.d, z1.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $w1 killed $w1 def $x1
+; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
+; SME2-NEXT: mov z0.d, x0
+; SME2-NEXT: mov z1.d, x1
+; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
+; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, #3 // =0x3
+; CHECK-NEXT: ret
+ %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 8 x i16> %retval
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
index 9306c20..7dcd56c 100644
--- a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
@@ -1,14 +1,14 @@
-; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s |FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve < %s | FileCheck %s
-declare i32 @llvm.vscale.i32()
-declare i64 @llvm.vscale.i64()
+target triple = "aarch64-unknown-linux-gnu"
; Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
define i64 @combine_add_vscale_i64() nounwind {
; CHECK-LABEL: combine_add_vscale_i64:
-; CHECK-NOT: add
-; CHECK-NEXT: cntd x0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x0
+; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%add = add i64 %vscale, %vscale
ret i64 %add
@@ -16,9 +16,10 @@ define i64 @combine_add_vscale_i64() nounwind {
define i32 @combine_add_vscale_i32() nounwind {
; CHECK-LABEL: combine_add_vscale_i32:
-; CHECK-NOT: add
-; CHECK-NEXT: cntd x0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%add = add i32 %vscale, %vscale
ret i32 %add
@@ -28,9 +29,9 @@ define i32 @combine_add_vscale_i32() nounwind {
; In this test, C0 = 1, C1 = 32.
define i64 @combine_mul_vscale_i64() nounwind {
; CHECK-LABEL: combine_mul_vscale_i64:
-; CHECK-NOT: mul
-; CHECK-NEXT: rdvl x0, #2
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x0, #2
+; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%mul = mul i64 %vscale, 32
ret i64 %mul
@@ -38,9 +39,10 @@ define i64 @combine_mul_vscale_i64() nounwind {
define i32 @combine_mul_vscale_i32() nounwind {
; CHECK-LABEL: combine_mul_vscale_i32:
-; CHECK-NOT: mul
-; CHECK-NEXT: rdvl x0, #3
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x0, #3
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%mul = mul i32 %vscale, 48
ret i32 %mul
@@ -49,11 +51,11 @@ define i32 @combine_mul_vscale_i32() nounwind {
; Canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
define i64 @combine_sub_vscale_i64(i64 %in) nounwind {
; CHECK-LABEL: combine_sub_vscale_i64:
-; CHECK-NOT: sub
-; CHECK-NEXT: rdvl x8, #-1
-; CHECK-NEXT: asr x8, x8, #4
-; CHECK-NEXT: add x0, x0, x8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #-1
+; CHECK-NEXT: asr x8, x8, #4
+; CHECK-NEXT: add x0, x0, x8
+; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%sub = sub i64 %in, %vscale
ret i64 %sub
@@ -61,11 +63,11 @@ define i64 @combine_sub_vscale_i64(i64 %in) nounwind {
define i32 @combine_sub_vscale_i32(i32 %in) nounwind {
; CHECK-LABEL: combine_sub_vscale_i32:
-; CHECK-NOT: sub
-; CHECK-NEXT: rdvl x8, #-1
-; CHECK-NEXT: asr x8, x8, #4
-; CHECK-NEXT: add w0, w0, w8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #-1
+; CHECK-NEXT: asr x8, x8, #4
+; CHECK-NEXT: add w0, w0, w8
+; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%sub = sub i32 %in, %vscale
ret i32 %sub
@@ -75,12 +77,13 @@ define i32 @combine_sub_vscale_i32(i32 %in) nounwind {
; (sub X, (vscale * C)) to (add X, (vscale * -C))
define i64 @multiple_uses_sub_vscale_i64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: multiple_uses_sub_vscale_i64:
-; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: sub x9, x0, x8
-; CHECK-NEXT: add x8, x1, x8
-; CHECK-NEXT: mul x0, x9, x8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: lsr x8, x8, #4
+; CHECK-NEXT: sub x9, x0, x8
+; CHECK-NEXT: add x8, x1, x8
+; CHECK-NEXT: mul x0, x9, x8
+; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%sub = sub i64 %x, %vscale
%add = add i64 %y, %vscale
@@ -95,9 +98,9 @@ define i64 @multiple_uses_sub_vscale_i64(i64 %x, i64 %y) nounwind {
; Hence, the immediate for RDVL is #1.
define i64 @combine_shl_vscale_i64() nounwind {
; CHECK-LABEL: combine_shl_vscale_i64:
-; CHECK-NOT: shl
-; CHECK-NEXT: rdvl x0, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x0, #1
+; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%shl = shl i64 %vscale, 4
ret i64 %shl
@@ -105,10 +108,38 @@ define i64 @combine_shl_vscale_i64() nounwind {
define i32 @combine_shl_vscale_i32() nounwind {
; CHECK-LABEL: combine_shl_vscale_i32:
-; CHECK-NOT: shl
-; CHECK-NEXT: rdvl x0, #1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: rdvl x0, #1
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale.i32()
%shl = shl i32 %vscale, 4
ret i32 %shl
}
+
+define i64 @combine_shl_mul_vscale(i64 %a) nounwind {
+; CHECK-LABEL: combine_shl_mul_vscale:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8
+; CHECK-NEXT: mul x0, x0, x8
+; CHECK-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale.i64()
+ %mul = mul i64 %a, %vscale
+ %shl = shl i64 %mul, 3
+ ret i64 %shl
+}
+
+define i64 @combine_shl_mul_vscale_commuted(i64 %a) nounwind {
+; CHECK-LABEL: combine_shl_mul_vscale_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8
+; CHECK-NEXT: mul x0, x0, x8
+; CHECK-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, %a
+ %shl = shl i64 %mul, 3
+ ret i64 %shl
+}
+
+declare i32 @llvm.vscale.i32()
+declare i64 @llvm.vscale.i64()
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
new file mode 100644
index 0000000..7336a54
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: widget
+tracksRegLiveness: true
+frameInfo:
+ adjustsStack: true
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+ - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ ; GCN-LABEL: name: widget
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GCN-NEXT: $exec = S_MOV_B64 -1
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62
+ ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: renamable $agpr0 = COPY killed renamable $vgpr62
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg
+ ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF
+ ; GCN-NEXT: dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: liveins: $agpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; GCN-NEXT: liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $agpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: renamable $vgpr62 = COPY renamable $agpr0
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg
+ ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.4:
+ ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
+ ; GCN-NEXT: $exec = S_MOV_B64 -1
+ ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; GCN-NEXT: SI_RETURN
+ bb.0:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15
+
+ %45:vgpr_32 = IMPLICIT_DEF
+ SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+ %16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec
+
+ bb.1:
+ successors: %bb.3, %bb.2
+
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99
+
+ S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.3:
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ $sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+ ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ S_BRANCH %bb.2
+
+ bb.4:
+ SI_RETURN
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 1b9b508..cefcbdd 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -457,27 +457,58 @@ define amdgpu_kernel void @v_ctpop_v4i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 7, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T6.X, 1
+; EG-NEXT: ALU 37, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_64 T8.XY, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT * T0.W, T8.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR * T0.W, T8.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
-; EG-NEXT: AND_INT * T0.W, T0.X, literal.x,
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: AND_INT * T0.W, T8.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR * T0.W, T8.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, T1.W, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <4 x i16>, ptr addrspace(1) %in.gep, align 16
@@ -570,33 +601,94 @@ define amdgpu_kernel void @v_ctpop_v8i16(ptr addrspace(1) noalias %out, ptr addr
;
; EG-LABEL: v_ctpop_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 13, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T8.X, 1
+; EG-NEXT: ALU 73, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T12.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR * T0.W, T0.Z, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: LSHR * T0.W, T12.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT * T0.W, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: LSHR * T0.W, T12.Y, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Z, PS,
-; EG-NEXT: LSHR * T1.W, T0.X, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T8.X,
+; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
+; EG-NEXT: BCNT_INT T0.W, PV.W,
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.X, T9.X,
+; EG-NEXT: LSHR * T0.W, T12.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T12.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: LSHR T12.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.X, T4.X,
+; EG-NEXT: MOV * T0.Z, T8.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <8 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <8 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -745,46 +837,174 @@ define amdgpu_kernel void @v_ctpop_v16i16(ptr addrspace(1) noalias %out, ptr add
;
; EG-LABEL: v_ctpop_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 25, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T13.X, 1
+; EG-NEXT: ALU 3, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 114, @16, KC0[], KC1[]
+; EG-NEXT: ALU 34, @131, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T0.X, 16, #1
-; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T20.XYZW, T0.X, 16, #1
+; EG-NEXT: VTX_READ_128 T21.XYZW, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 5(7.006492e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR * T0.W, T12.Z, literal.x,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: LSHR * T0.W, T20.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.W, PV.W,
-; EG-NEXT: AND_INT * T0.W, T12.Z, literal.x,
+; EG-NEXT: BCNT_INT * T0.W, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.X, T5.X,
+; EG-NEXT: LSHR * T0.W, T20.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.Y, PS, PV.W,
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T8.X,
+; EG-NEXT: LSHR * T0.W, T20.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.Z, PS,
-; EG-NEXT: LSHR T0.W, T0.Z, literal.x,
-; EG-NEXT: LSHR * T1.W, T12.X, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.X, T9.X,
+; EG-NEXT: LSHR * T0.W, T20.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T0.W, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.Y, PS,
-; EG-NEXT: AND_INT T0.Z, T0.Z, literal.x,
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T0.W, T20.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
; EG-NEXT: BCNT_INT T0.W, PV.W,
-; EG-NEXT: AND_INT * T1.W, T12.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.X, T12.X,
+; EG-NEXT: LSHR * T1.W, T21.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV * T0.X, T13.X,
+; EG-NEXT: LSHR * T1.W, T21.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T20.Y, PS, PV.W,
+; EG-NEXT: MOV T13.X, PV.Y,
+; EG-NEXT: MOV * T0.X, T16.X,
+; EG-NEXT: LSHR * T1.W, T21.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: ALU clause starting at 131:
+; EG-NEXT: MOV * T16.X, T1.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT * T1.W, T21.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV * T0.X, T17.X,
+; EG-NEXT: LSHR * T1.W, T21.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: BCNT_INT T12.X, PS,
-; EG-NEXT: BCNT_INT T0.Z, PV.Z,
-; EG-NEXT: LSHR T1.W, T0.X, literal.x,
-; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T13.X, PS, literal.x,
-; EG-NEXT: BCNT_INT T0.Y, PV.W,
-; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
-; EG-NEXT: BCNT_INT T0.X, PV.W,
-; EG-NEXT: LSHR * T14.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: AND_INT T1.W, T21.W, literal.x,
+; EG-NEXT: LSHR * T21.X, KC0[2].Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45)
+; EG-NEXT: AND_INT T0.Z, PV.X, literal.x,
+; EG-NEXT: BCNT_INT T1.W, PV.W,
+; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
+; EG-NEXT: -65536(nan), 16(2.242078e-44)
+; EG-NEXT: LSHR T22.X, PS, literal.x,
+; EG-NEXT: OR_INT * T20.W, PV.Z, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T17.X, PV.W,
+; EG-NEXT: MOV * T0.X, T4.X,
+; EG-NEXT: MOV * T0.Z, T8.X,
+; EG-NEXT: MOV T20.X, T12.X,
+; EG-NEXT: MOV * T20.Z, T16.X, BS:VEC_120/SCL_212
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%in.gep = getelementptr <16 x i16>, ptr addrspace(1) %in, i32 %tid
%val = load <16 x i16>, ptr addrspace(1) %in.gep, align 32
@@ -1292,7 +1512,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_4:
-; SI-NEXT: ; implicit-def: $vgpr0
+; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_branch .LBB14_2
;
; VI-LABEL: ctpop_i16_in_br:
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index ceacdf5..cbda062 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -1,45 +1,184 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-FASTFMA,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,SI-DENORM,GCN-DENORM-SLOWFMA,SI %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=SI-DENORM-FASTFMA,SI-DENORM-FASTFMA-STRICT %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=SI-DENORM-SLOWFMA %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD,SI-FLUSH,SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=preserve-sign -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=tahiti -denormal-fp-math-f32=ieee -mattr=+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=SI-DENORM-FASTFMA,SI-DENORM-FASTFMA-CONTRACT %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=verde -denormal-fp-math-f32=ieee -mattr=-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=SI-DENORM-SLOWFMA %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-MAD %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-MAD %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-MAD %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx900 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-FLUSH,GFX9-FLUSH-FMAC %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-DENORM,GFX9-DENORM-FASTFMA-FMAC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s
-
-; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
-; XUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM-FASTFMA %s
-
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-FMAC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GFX10,GFX10-DENORM %s
; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
target triple = "amdgcn--"
-
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare half @llvm.fmuladd.f16(half, half, half) #1
declare float @llvm.fabs.f32(float) #1
-; GCN-LABEL: {{^}}fmuladd_f32:
-; GCN-FLUSH-MAD: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-
-; GCN-DENORM-FASTFMA: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-
-; GCN-DENORM-SLOWFMA: v_mul_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
- ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+; SI-FLUSH-LABEL: fmuladd_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s11, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s10, -1
+; SI-FLUSH-NEXT: s_mov_b32 s14, s10
+; SI-FLUSH-NEXT: s_mov_b32 s15, s11
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b32 s12, s2
+; SI-FLUSH-NEXT: s_mov_b32 s13, s3
+; SI-FLUSH-NEXT: s_mov_b32 s16, s4
+; SI-FLUSH-NEXT: s_mov_b32 s17, s5
+; SI-FLUSH-NEXT: s_mov_b32 s18, s10
+; SI-FLUSH-NEXT: s_mov_b32 s19, s11
+; SI-FLUSH-NEXT: s_mov_b32 s4, s6
+; SI-FLUSH-NEXT: s_mov_b32 s5, s7
+; SI-FLUSH-NEXT: s_mov_b32 s6, s10
+; SI-FLUSH-NEXT: s_mov_b32 s7, s11
+; SI-FLUSH-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-FLUSH-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-FLUSH-NEXT: buffer_load_dword v2, off, s[4:7], 0
+; SI-FLUSH-NEXT: s_mov_b32 s8, s0
+; SI-FLUSH-NEXT: s_mov_b32 s9, s1
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
+; SI-FLUSH-NEXT: buffer_store_dword v2, off, s[8:11], 0
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, off, s[4:7], 0
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v0, v0, v1, v2
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, off, s[4:7], 0
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(1)
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[10:11]
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[12:13]
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v3, v0, s[14:15]
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[10:11]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[12:13]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v3, v0, s[14:15]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, v2, v3
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11]
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13]
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15]
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x2
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: global_load_dword v2, v0, s[4:5]
+; GFX10-NEXT: global_load_dword v3, v0, s[6:7]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX10-NEXT: global_store_dword v0, v3, s[0:1]
+; GFX10-NEXT: s_endpgm
%r0 = load float, ptr addrspace(1) %in1
%r1 = load float, ptr addrspace(1) %in2
%r2 = load float, ptr addrspace(1) %in3
@@ -48,18 +187,190 @@ define amdgpu_kernel void @fmuladd_f32(ptr addrspace(1) %out, ptr addrspace(1) %
ret void
}
-; GCN-LABEL: {{^}}fmul_fadd_f32:
-; GCN-FLUSH: v_mac_f32
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
-
-; GCN-DENORM-STRICT: v_mul_f32_e32
-; GCN-DENORM-STRICT: v_add_f32_e32
-define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
- ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+; SI-FLUSH-LABEL: fmul_fadd_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s11, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s10, -1
+; SI-FLUSH-NEXT: s_mov_b32 s14, s10
+; SI-FLUSH-NEXT: s_mov_b32 s15, s11
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b32 s12, s2
+; SI-FLUSH-NEXT: s_mov_b32 s13, s3
+; SI-FLUSH-NEXT: s_mov_b32 s16, s4
+; SI-FLUSH-NEXT: s_mov_b32 s17, s5
+; SI-FLUSH-NEXT: s_mov_b32 s18, s10
+; SI-FLUSH-NEXT: s_mov_b32 s19, s11
+; SI-FLUSH-NEXT: s_mov_b32 s4, s6
+; SI-FLUSH-NEXT: s_mov_b32 s5, s7
+; SI-FLUSH-NEXT: s_mov_b32 s6, s10
+; SI-FLUSH-NEXT: s_mov_b32 s7, s11
+; SI-FLUSH-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b32 s8, s0
+; SI-FLUSH-NEXT: s_mov_b32 s9, s1
+; SI-FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
+; SI-FLUSH-NEXT: buffer_store_dword v2, off, s[8:11], 0
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: fmul_fadd_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmul_fadd_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: fmul_fadd_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: fmul_fadd_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fmul_fadd_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: fmul_fadd_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[4:5] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[6:7] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX10-FLUSH-NEXT: global_store_dword v0, v3, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: fmul_fadd_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[4:5] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[6:7] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v3
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%r0 = load volatile float, ptr addrspace(1) %in1
%r1 = load volatile float, ptr addrspace(1) %in2
%r2 = load volatile float, ptr addrspace(1) %in3
@@ -69,15 +380,172 @@ define amdgpu_kernel void @fmul_fadd_f32(ptr addrspace(1) %out, ptr addrspace(1)
ret void
}
-; GCN-LABEL: {{^}}fmul_fadd_contract_f32:
-; GCN-FLUSH-FMAC: v_fmac_f32_e32
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32
-
-; GCN-DENORM-FASTFMA: v_fma_f32
-define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1,
- ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %in3) #0 {
+; SI-FLUSH-LABEL: fmul_fadd_contract_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s11, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s10, -1
+; SI-FLUSH-NEXT: s_mov_b32 s14, s10
+; SI-FLUSH-NEXT: s_mov_b32 s15, s11
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b32 s12, s2
+; SI-FLUSH-NEXT: s_mov_b32 s13, s3
+; SI-FLUSH-NEXT: s_mov_b32 s16, s4
+; SI-FLUSH-NEXT: s_mov_b32 s17, s5
+; SI-FLUSH-NEXT: s_mov_b32 s18, s10
+; SI-FLUSH-NEXT: s_mov_b32 s19, s11
+; SI-FLUSH-NEXT: s_mov_b32 s4, s6
+; SI-FLUSH-NEXT: s_mov_b32 s5, s7
+; SI-FLUSH-NEXT: s_mov_b32 s6, s10
+; SI-FLUSH-NEXT: s_mov_b32 s7, s11
+; SI-FLUSH-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b32 s8, s0
+; SI-FLUSH-NEXT: s_mov_b32 s9, s1
+; SI-FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
+; SI-FLUSH-NEXT: buffer_store_dword v2, off, s[8:11], 0
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmul_fadd_contract_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v0, v0, v1, v2
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmul_fadd_contract_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s11, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s10, -1
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s14, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s15, s11
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s12, s2
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s13, s3
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s16, s4
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s17, s5
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s18, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s19, s11
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s4, s6
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s5, s7
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, s10
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, s11
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s8, s0
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s9, s1
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v0, v0, v1
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmul_fadd_contract_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmul_fadd_contract_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, v2, v3
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmul_fadd_contract_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmul_fadd_contract_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[10:11] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[12:13] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v3, v0, s[14:15] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v3, s[8:9]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmul_fadd_contract_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[4:5] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v3, v1, v2
+; GFX10-NEXT: global_store_dword v0, v3, s[0:1]
+; GFX10-NEXT: s_endpgm
%r0 = load volatile float, ptr addrspace(1) %in1
%r1 = load volatile float, ptr addrspace(1) %in2
%r2 = load volatile float, ptr addrspace(1) %in3
@@ -87,23 +555,120 @@ define amdgpu_kernel void @fmul_fadd_contract_f32(ptr addrspace(1) %out, ptr add
ret void
}
-; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_2.0_a_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, 2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_2.0_a_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, 2.0, v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_2.0_a_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_2.0_a_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_2.0_a_b_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, 2.0, v2
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_a_b_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_2.0_a_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -117,24 +682,120 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; GCN-LABEL: {{^}}fmuladd_a_2.0_b_f32
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
-
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_a_2.0_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, 2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_a_2.0_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, 2.0, v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_a_2.0_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_a_2.0_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_a_2.0_b_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, 2.0, v2
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_a_2.0_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_a_2.0_b_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_a_2.0_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -148,28 +809,126 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; GCN-LABEL: {{^}}fadd_a_a_b_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
-
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out,
- ptr addrspace(1) %in1,
- ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+; SI-FLUSH-LABEL: fadd_a_a_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, 2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: fadd_a_a_b_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fadd_a_a_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: fadd_a_a_b_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, 2.0, v3
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: fadd_a_a_b_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fadd_a_a_b_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: fadd_a_a_b_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX10-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: fadd_a_a_b_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -184,28 +943,126 @@ define amdgpu_kernel void @fadd_a_a_b_f32(ptr addrspace(1) %out,
ret void
}
-; GCN-LABEL: {{^}}fadd_b_a_a_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
-
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out,
- ptr addrspace(1) %in1,
- ptr addrspace(1) %in2) #0 {
+define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
+; SI-FLUSH-LABEL: fadd_b_a_a_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, 2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: fadd_b_a_a_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fadd_b_a_a_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: fadd_b_a_a_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, 2.0, v3
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: fadd_b_a_a_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fadd_b_a_a_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: fadd_b_a_a_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX10-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: fadd_b_a_a_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v2, v1
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -220,20 +1077,120 @@ define amdgpu_kernel void @fadd_b_a_a_f32(ptr addrspace(1) %out,
ret void
}
-; GCN-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], -2.0, [[R1]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_neg_2.0_a_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, -2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_neg_2.0_a_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, -2.0, v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_neg_2.0_a_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v3, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_neg_2.0_a_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v2, -2.0, v1
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_neg_2.0_a_b_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, -2.0, v2
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_a_b_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_neg_2.0_a_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -247,25 +1204,120 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(ptr addrspace(1) %out, ptr ad
ret void
}
-; XXX
-; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
-
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, 2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, 2.0, v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v3, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, 2.0, v2
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_neg_2.0_neg_a_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, 2.0, v1
+; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -281,24 +1333,120 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(ptr addrspace(1) %out, pt
ret void
}
-; GCN-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-
-; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
-; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], -2.0, [[R1]]
-
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_2.0_neg_a_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, -2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_2.0_neg_a_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, -2.0, v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_2.0_neg_a_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v3, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_2.0_neg_a_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mac_f32_e32 v2, -2.0, v1
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-MAD-LABEL: fmuladd_2.0_neg_a_b_f32:
+; GFX9-DENORM-FASTFMA-MAD: ; %bb.0:
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-MAD-NEXT: v_fma_f32 v1, v1, -2.0, v2
+; GFX9-DENORM-FASTFMA-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-FASTFMA-MAD-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX9-DENORM-FASTFMA-FMAC-LABEL: fmuladd_2.0_neg_a_b_f32:
+; GFX9-DENORM-FASTFMA-FMAC: ; %bb.0:
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-DENORM-FASTFMA-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_2.0_neg_a_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, -2.0, v1
+; GFX10-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -314,23 +1462,107 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(ptr addrspace(1) %out, ptr ad
ret void
}
-; GCN-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-; GCN-FLUSH-MAD: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
-; GCN-FLUSH-FMAC: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
-
-; SI-FLUSH: buffer_store_dword [[RESULT]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
-
-; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
-
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fmuladd_2.0_a_neg_b_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mad_f32 v2, v2, 2.0, -v3
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-LABEL: fmuladd_2.0_a_neg_b_f32:
+; SI-DENORM-FASTFMA: ; %bb.0:
+; SI-DENORM-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-NEXT: v_fma_f32 v2, v2, 2.0, -v3
+; SI-DENORM-FASTFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fmuladd_2.0_a_neg_b_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; GFX9-FLUSH-MAD-LABEL: fmuladd_2.0_a_neg_b_f32:
+; GFX9-FLUSH-MAD: ; %bb.0:
+; GFX9-FLUSH-MAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-MAD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-MAD-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-MAD-NEXT: v_mad_f32 v1, v1, 2.0, -v2
+; GFX9-FLUSH-MAD-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-MAD-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fmuladd_2.0_a_neg_b_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_fma_f32 v1, v1, 2.0, -v2
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX9-FLUSH-FMAC-LABEL: fmuladd_2.0_a_neg_b_f32:
+; GFX9-FLUSH-FMAC: ; %bb.0:
+; GFX9-FLUSH-FMAC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-FMAC-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-FMAC-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-FMAC-NEXT: v_fma_f32 v1, v1, 2.0, -v2
+; GFX9-FLUSH-FMAC-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-FMAC-NEXT: s_endpgm
+;
+; GFX10-LABEL: fmuladd_2.0_a_neg_b_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fma_f32 v1, v1, 2.0, -v2
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -346,23 +1578,150 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(ptr addrspace(1) %out, ptr ad
ret void
}
-; GCN-LABEL: {{^}}mad_sub_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
-
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: mad_sub_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mad_f32 v2, v2, v3, -v4
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: mad_sub_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: mad_sub_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: mad_sub_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, -v4
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: mad_sub_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, v1, v2, -v3
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: mad_sub_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: mad_sub_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, v1, v2, -v3
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: mad_sub_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -380,24 +1739,150 @@ define amdgpu_kernel void @mad_sub_f32(ptr addrspace(1) noalias nocapture %out,
ret void
}
-; GCN-LABEL: {{^}}mad_sub_inv_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-
-; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: mad_sub_inv_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mad_f32 v2, -v2, v3, v4
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: mad_sub_inv_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e32 v2, v4, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: mad_sub_inv_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v4, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: mad_sub_inv_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, -v2, v3, v4
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: mad_sub_inv_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, -v1, v2, v3
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: mad_sub_inv_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v3, v1
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: mad_sub_inv_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, -v1, v2, v3
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: mad_sub_inv_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_sub_f32_e32 v1, v3, v1
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -415,23 +1900,150 @@ define amdgpu_kernel void @mad_sub_inv_f32(ptr addrspace(1) noalias nocapture %o
ret void
}
-; GCN-LABEL: {{^}}mad_sub_fabs_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
-
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: mad_sub_fabs_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mad_f32 v2, v2, v3, -|v4|
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: mad_sub_fabs_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e64 v2, v2, |v4|
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: mad_sub_fabs_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e64 v2, v2, |v4|
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: mad_sub_fabs_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, -|v4|
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: mad_sub_fabs_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, v1, v2, -|v3|
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: mad_sub_fabs_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_sub_f32_e64 v1, v1, |v3|
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: mad_sub_fabs_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, v1, v2, -|v3|
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: mad_sub_fabs_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_sub_f32_e64 v1, v1, |v3|
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -450,24 +2062,150 @@ define amdgpu_kernel void @mad_sub_fabs_f32(ptr addrspace(1) noalias nocapture %
ret void
}
-; GCN-LABEL: {{^}}mad_sub_fabs_inv_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-; GCN-FLUSH-MAD: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-; GCN-FLUSH-FMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
-
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: mad_sub_fabs_inv_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mad_f32 v2, -v2, v3, |v4|
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: mad_sub_fabs_inv_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e64 v2, |v4|, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: mad_sub_fabs_inv_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e64 v2, |v4|, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: mad_sub_fabs_inv_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, -v2, v3, |v4|
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: mad_sub_fabs_inv_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, -v1, v2, |v3|
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: mad_sub_fabs_inv_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_sub_f32_e64 v1, |v3|, v1
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: mad_sub_fabs_inv_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, -v1, v2, |v3|
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: mad_sub_fabs_inv_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_sub_f32_e64 v1, |v3|, v1
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -486,26 +2224,150 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(ptr addrspace(1) noalias nocaptu
ret void
}
-; GCN-LABEL: {{^}}neg_neg_mad_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-
-; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGA]], [[REGB]]
-; SI-FLUSH: buffer_store_dword [[REGC]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-
-; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: neg_neg_mad_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mac_f32_e32 v4, v2, v3
+; SI-FLUSH-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: neg_neg_mad_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v4, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: neg_neg_mad_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v4, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: neg_neg_mad_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, v3, v4
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: neg_neg_mad_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX9-FLUSH-NEXT: global_store_dword v0, v3, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: neg_neg_mad_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: neg_neg_mad_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mac_f32_e32 v3, v1, v2
+; GFX10-FLUSH-NEXT: global_store_dword v0, v3, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: neg_neg_mad_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v3, v1
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -525,23 +2387,150 @@ define amdgpu_kernel void @neg_neg_mad_f32(ptr addrspace(1) noalias nocapture %o
ret void
}
-; GCN-LABEL: {{^}}mad_fabs_sub_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
-; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
-
-; GCN-DENORM-STRICT: v_mul_f32_e64 [[TMP:v[0-9]+]], [[REGA]], |[[REGB]]|
-; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture readonly %ptr) #0 {
+; SI-FLUSH-LABEL: mad_fabs_sub_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s7, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s6, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-FLUSH-NEXT: v_mad_f32 v2, v2, |v3|, -v4
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: mad_fabs_sub_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mul_f32_e64 v2, v2, |v3|
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: mad_fabs_sub_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-SLOWFMA-NEXT: v_mul_f32_e64 v2, v2, |v3|
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v2, v4
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: mad_fabs_sub_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s7, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s6, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[4:5], s[2:3]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b64 s[2:3], s[6:7]
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, |v3|, -v4
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: mad_fabs_sub_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, v1, |v2|, -v3
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: mad_fabs_sub_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mul_f32_e64 v1, v1, |v2|
+; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: mad_fabs_sub_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, v1, |v2|, -v3
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: mad_fabs_sub_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[2:3] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v3, v0, s[2:3] offset:8 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f32_e64 v1, v1, |v2|
+; GFX10-DENORM-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tid.ext = sext i32 %tid to i64
%gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 %tid.ext
@@ -560,24 +2549,126 @@ define amdgpu_kernel void @mad_fabs_sub_f32(ptr addrspace(1) noalias nocapture %
ret void
}
-; GCN-LABEL: {{^}}fsub_c_fadd_a_a_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
-; SI-FLUSH: buffer_store_dword [[R2]]
-; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
-
-; SI-DENORM: buffer_store_dword [[RESULT]]
-; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fsub_c_fadd_a_a_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mac_f32_e32 v3, -2.0, v2
+; SI-FLUSH-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: fsub_c_fadd_a_a_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e32 v2, v3, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fsub_c_fadd_a_a_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v3, v2
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: fsub_c_fadd_a_a_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, -2.0, v3
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: fsub_c_fadd_a_a_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mac_f32_e32 v2, -2.0, v1
+; GFX9-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fsub_c_fadd_a_a_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v2, v1
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: fsub_c_fadd_a_a_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mac_f32_e32 v2, -2.0, v1
+; GFX10-FLUSH-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: fsub_c_fadd_a_a_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX10-DENORM-NEXT: v_sub_f32_e32 v1, v2, v1
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
@@ -593,22 +2684,126 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; GCN-LABEL: {{^}}fsub_fadd_a_a_c_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
-; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
-; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
-
-; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
-
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; GCN-DENORM-STRICT: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
-; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
-
-; SI: buffer_store_dword [[RESULT]]
-; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define amdgpu_kernel void @fsub_fadd_a_a_c_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-FLUSH-LABEL: fsub_fadd_a_a_c_f32:
+; SI-FLUSH: ; %bb.0:
+; SI-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-FLUSH-NEXT: s_mov_b32 s3, 0xf000
+; SI-FLUSH-NEXT: s_mov_b32 s2, 0
+; SI-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-FLUSH-NEXT: v_mov_b32_e32 v1, 0
+; SI-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; SI-FLUSH-NEXT: v_mad_f32 v2, v2, 2.0, -v3
+; SI-FLUSH-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-FLUSH-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-STRICT-LABEL: fsub_fadd_a_a_c_f32:
+; SI-DENORM-FASTFMA-STRICT: ; %bb.0:
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-FASTFMA-STRICT-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-DENORM-FASTFMA-STRICT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-STRICT-NEXT: s_endpgm
+;
+; SI-DENORM-SLOWFMA-LABEL: fsub_fadd_a_a_c_f32:
+; SI-DENORM-SLOWFMA: ; %bb.0:
+; SI-DENORM-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-SLOWFMA-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-SLOWFMA-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-SLOWFMA-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-SLOWFMA-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-SLOWFMA-NEXT: v_add_f32_e32 v2, v2, v2
+; SI-DENORM-SLOWFMA-NEXT: v_sub_f32_e32 v2, v2, v3
+; SI-DENORM-SLOWFMA-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-SLOWFMA-NEXT: s_endpgm
+;
+; SI-DENORM-FASTFMA-CONTRACT-LABEL: fsub_fadd_a_a_c_f32:
+; SI-DENORM-FASTFMA-CONTRACT: ; %bb.0:
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s3, 0xf000
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_mov_b32 s2, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_mov_b32_e32 v1, 0
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4 glc
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: v_fma_f32 v2, v2, 2.0, -v3
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-DENORM-FASTFMA-CONTRACT-NEXT: s_endpgm
+;
+; GFX9-FLUSH-LABEL: fsub_fadd_a_a_c_f32:
+; GFX9-FLUSH: ; %bb.0:
+; GFX9-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX9-FLUSH-NEXT: v_mad_f32 v1, v1, 2.0, -v2
+; GFX9-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-FLUSH-NEXT: s_endpgm
+;
+; GFX9-DENORM-LABEL: fsub_fadd_a_a_c_f32:
+; GFX9-DENORM: ; %bb.0:
+; GFX9-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX9-DENORM-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX9-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-DENORM-NEXT: s_endpgm
+;
+; GFX10-FLUSH-LABEL: fsub_fadd_a_a_c_f32:
+; GFX10-FLUSH: ; %bb.0:
+; GFX10-FLUSH-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-FLUSH-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-FLUSH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLUSH-NEXT: v_mad_f32 v1, v1, 2.0, -v2
+; GFX10-FLUSH-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-FLUSH-NEXT: s_endpgm
+;
+; GFX10-DENORM-LABEL: fsub_fadd_a_a_c_f32:
+; GFX10-DENORM: ; %bb.0:
+; GFX10-DENORM-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v0, s[0:1] offset:4 glc dlc
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v1
+; GFX10-DENORM-NEXT: v_sub_f32_e32 v1, v1, v2
+; GFX10-DENORM-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-DENORM-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %out, i32 %tid
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll b/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll
new file mode 100644
index 0000000..892955c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll
@@ -0,0 +1,98 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
+
+; CHECK: error: couldn't allocate output register for constraint '{v256}'
+define void @out_of_bounds_vgpr32_def() {
+ %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v256}"()
+ ret void
+}
+
+; CHECK: error: couldn't allocate output register for constraint '{v[255:256]}'
+define void @out_of_bounds_vgpr64_def_high_tuple() {
+ %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v[255:256]}"()
+ ret void
+}
+
+; CHECK: error: couldn't allocate output register for constraint '{v[256:257]}'
+define void @out_of_bounds_vgpr64_def_low_tuple() {
+ %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v[256:257]}"()
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v256}'
+define void @out_of_bounds_vgpr32_use() {
+ %v = tail call i32 asm sideeffect "v_mov_b32 %0, %1", "=v,{v256}"(i32 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[255:256]}'
+define void @out_of_bounds_vgpr64_high_tuple() {
+ tail call void asm sideeffect "; use %0", "{v[255:256]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[256:257]}'
+define void @out_of_bounds_vgpr64_low_tuple() {
+ tail call void asm sideeffect "; use %0", "{v[256:257]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[1:0]}'
+define void @vgpr_tuple_swapped() {
+ tail call void asm sideeffect "; use %0", "{v[1:0]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v4294967295}'
+define void @vgpr_uintmax() {
+ tail call void asm sideeffect "; use %0", "{v4294967295}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v4294967296}'
+define void @vgpr_uintmax_p1() {
+ tail call void asm sideeffect "; use %0", "{v4294967296}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[4294967295:4294967296]}'
+define void @vgpr_tuple_uintmax() {
+ tail call void asm sideeffect "; use %0", "{v[4294967295:4294967296]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[0:4294967295]}'
+define void @vgpr_tuple_0_uintmax() {
+ tail call void asm sideeffect "; use %0", "{v[0:4294967295]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[0:4294967296]}'
+define void @vgpr_tuple_0_uintmax_p1() {
+ tail call void asm sideeffect "; use %0", "{v[0:4294967296]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[4294967264:4294967295]}'
+define void @vgpr32_last_is_uintmax() {
+ tail call void asm sideeffect "; use %0", "{v[4294967264:4294967295]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[4294967265:4294967296]}'
+define void @vgpr32_last_is_uintmax_p1() {
+ tail call void asm sideeffect "; use %0", "{v[4294967265:4294967296]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[2:2147483651]}'
+define void @overflow_bitwidth_0() {
+ tail call void asm sideeffect "; use %0", "{v[2:2147483651]}"(i64 123)
+ ret void
+}
+
+; CHECK: error: couldn't allocate input reg for constraint '{v[2147483635:2147483651]}'
+define void @overflow_bitwidth_1() {
+ tail call void asm sideeffect "; use %0", "{v[2147483635:2147483651]}"(i64 123)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index bad2e60..a2da887 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1025,67 +1025,74 @@ define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16>
;
; EG-LABEL: v3i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 44, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 48, #3
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: v3i16_arg:
; CM: ; %bb.0: ; %entry
; CM-NEXT: ALU 0, @12, KC0[], KC1[]
-; CM-NEXT: TEX 0 @8
-; CM-NEXT: ALU 13, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T1.XW, T2.X
-; CM-NEXT: ALU 1, @27, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @10
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3
-; CM-NEXT: Fetch clause starting at 10:
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; CM-NEXT: VTX_READ_16 T7.X, T5.X, 46, #3
+; CM-NEXT: VTX_READ_16 T5.X, T5.X, 48, #3
; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: MOV * T5.X, 0.0,
; CM-NEXT: ALU clause starting at 13:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T1.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T1.Y, 0.0,
-; CM-NEXT: MOV * T1.Z, 0.0,
-; CM-NEXT: LSHR * T2.X, T0.W, literal.x,
+; CM-NEXT: MOV T5.Y, 0.0,
+; CM-NEXT: MOV * T5.Z, 0.0,
+; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: ALU clause starting at 27:
-; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
store <3 x i16> %in, ptr addrspace(1) %out, align 4
@@ -2669,47 +2676,205 @@ define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
;
; EG-LABEL: v8i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @14, KC0[], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
+; EG-NEXT: ALU 1, @36, KC0[], KC1[]
+; EG-NEXT: TEX 0 @20
+; EG-NEXT: ALU 5, @38, KC0[], KC1[]
+; EG-NEXT: TEX 0 @22
+; EG-NEXT: ALU 5, @44, KC0[], KC1[]
+; EG-NEXT: TEX 0 @24
+; EG-NEXT: ALU 5, @50, KC0[], KC1[]
+; EG-NEXT: TEX 0 @26
+; EG-NEXT: ALU 5, @56, KC0[], KC1[]
+; EG-NEXT: TEX 0 @28
+; EG-NEXT: ALU 5, @62, KC0[], KC1[]
+; EG-NEXT: TEX 0 @30
+; EG-NEXT: ALU 5, @68, KC0[], KC1[]
+; EG-NEXT: TEX 0 @32
+; EG-NEXT: ALU 5, @74, KC0[], KC1[]
+; EG-NEXT: TEX 0 @34
+; EG-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: MOV T1.Y, T2.X,
-; EG-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.W, T3.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT: Fetch clause starting at 22:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT: Fetch clause starting at 26:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT: Fetch clause starting at 28:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT: Fetch clause starting at 30:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT: Fetch clause starting at 32:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT: Fetch clause starting at 34:
+; EG-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
+; EG-NEXT: ALU clause starting at 36:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T7.X, 0.0,
+; EG-NEXT: ALU clause starting at 38:
+; EG-NEXT: LSHL T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 44:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: ALU clause starting at 50:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 56:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 62:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 68:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T8.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 74:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T8.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 80:
+; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T1.W, T7.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T7.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T7.W, T3.X,
+; EG-NEXT: MOV * T7.Y, T5.X,
;
; CM-LABEL: v8i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @14, KC0[], KC1[]
-; CM-NEXT: TEX 3 @6
-; CM-NEXT: ALU 4, @15, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 1, @36, KC0[], KC1[]
+; CM-NEXT: TEX 0 @20
+; CM-NEXT: ALU 5, @38, KC0[], KC1[]
+; CM-NEXT: TEX 0 @22
+; CM-NEXT: ALU 5, @44, KC0[], KC1[]
+; CM-NEXT: TEX 0 @24
+; CM-NEXT: ALU 5, @50, KC0[], KC1[]
+; CM-NEXT: TEX 0 @26
+; CM-NEXT: ALU 5, @56, KC0[], KC1[]
+; CM-NEXT: TEX 0 @28
+; CM-NEXT: ALU 5, @62, KC0[], KC1[]
+; CM-NEXT: TEX 0 @30
+; CM-NEXT: ALU 5, @68, KC0[], KC1[]
+; CM-NEXT: TEX 0 @32
+; CM-NEXT: ALU 5, @74, KC0[], KC1[]
+; CM-NEXT: TEX 0 @34
+; CM-NEXT: ALU 8, @80, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 52, #3
-; CM-NEXT: VTX_READ_16 T2.X, T0.X, 54, #3
-; CM-NEXT: VTX_READ_16 T3.X, T0.X, 62, #3
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 60, #3
-; CM-NEXT: ALU clause starting at 14:
-; CM-NEXT: MOV * T0.X, 0.0,
-; CM-NEXT: ALU clause starting at 15:
-; CM-NEXT: MOV T1.Y, T2.X,
-; CM-NEXT: MOV * T1.Z, T0.X, BS:VEC_120/SCL_212
-; CM-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: MOV * T1.W, T3.X,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: Fetch clause starting at 20:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; CM-NEXT: Fetch clause starting at 22:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; CM-NEXT: Fetch clause starting at 24:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; CM-NEXT: Fetch clause starting at 26:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; CM-NEXT: Fetch clause starting at 28:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; CM-NEXT: Fetch clause starting at 30:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; CM-NEXT: Fetch clause starting at 32:
+; CM-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; CM-NEXT: Fetch clause starting at 34:
+; CM-NEXT: VTX_READ_16 T7.X, T7.X, 52, #3
+; CM-NEXT: ALU clause starting at 36:
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: MOV * T7.X, 0.0,
+; CM-NEXT: ALU clause starting at 38:
+; CM-NEXT: LSHL T0.Z, T8.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 44:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: ALU clause starting at 50:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 56:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 62:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 68:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T8.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 74:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T8.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T7.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 80:
+; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: AND_INT * T0.W, T7.X, literal.z,
+; CM-NEXT: 2(2.802597e-45), -65536(nan)
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T7.X, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.X,
+; CM-NEXT: MOV * T7.W, T3.X,
+; CM-NEXT: MOV * T7.Y, T5.X,
entry:
store <8 x i16> %in, ptr addrspace(1) %out
ret void
@@ -3453,68 +3618,392 @@ define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
;
; EG-LABEL: v16i16_arg:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @22, KC0[], KC1[]
-; EG-NEXT: TEX 7 @6
-; EG-NEXT: ALU 10, @23, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T0.X, 1
+; EG-NEXT: ALU 1, @68, KC0[], KC1[]
+; EG-NEXT: TEX 0 @36
+; EG-NEXT: ALU 5, @70, KC0[], KC1[]
+; EG-NEXT: TEX 0 @38
+; EG-NEXT: ALU 5, @76, KC0[], KC1[]
+; EG-NEXT: TEX 0 @40
+; EG-NEXT: ALU 5, @82, KC0[], KC1[]
+; EG-NEXT: TEX 0 @42
+; EG-NEXT: ALU 5, @88, KC0[], KC1[]
+; EG-NEXT: TEX 0 @44
+; EG-NEXT: ALU 5, @94, KC0[], KC1[]
+; EG-NEXT: TEX 0 @46
+; EG-NEXT: ALU 5, @100, KC0[], KC1[]
+; EG-NEXT: TEX 0 @48
+; EG-NEXT: ALU 5, @106, KC0[], KC1[]
+; EG-NEXT: TEX 0 @50
+; EG-NEXT: ALU 5, @112, KC0[], KC1[]
+; EG-NEXT: TEX 0 @52
+; EG-NEXT: ALU 5, @118, KC0[], KC1[]
+; EG-NEXT: TEX 0 @54
+; EG-NEXT: ALU 5, @124, KC0[], KC1[]
+; EG-NEXT: TEX 0 @56
+; EG-NEXT: ALU 5, @130, KC0[], KC1[]
+; EG-NEXT: TEX 0 @58
+; EG-NEXT: ALU 5, @136, KC0[], KC1[]
+; EG-NEXT: TEX 0 @60
+; EG-NEXT: ALU 5, @142, KC0[], KC1[]
+; EG-NEXT: TEX 0 @62
+; EG-NEXT: ALU 5, @148, KC0[], KC1[]
+; EG-NEXT: TEX 0 @64
+; EG-NEXT: ALU 5, @154, KC0[], KC1[]
+; EG-NEXT: TEX 0 @66
+; EG-NEXT: ALU 13, @160, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T14.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T13.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 94, #3
-; EG-NEXT: VTX_READ_16 T4.X, T0.X, 78, #3
-; EG-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
-; EG-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
-; EG-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
-; EG-NEXT: ALU clause starting at 22:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 23:
-; EG-NEXT: MOV T1.Y, T2.X,
-; EG-NEXT: MOV * T7.Y, T0.X,
-; EG-NEXT: MOV * T1.Z, T6.X,
-; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV T7.Z, T5.X,
+; EG-NEXT: Fetch clause starting at 36:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
+; EG-NEXT: Fetch clause starting at 38:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
+; EG-NEXT: Fetch clause starting at 40:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
+; EG-NEXT: Fetch clause starting at 42:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
+; EG-NEXT: Fetch clause starting at 44:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
+; EG-NEXT: Fetch clause starting at 46:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
+; EG-NEXT: Fetch clause starting at 48:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
+; EG-NEXT: Fetch clause starting at 50:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
+; EG-NEXT: Fetch clause starting at 52:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
+; EG-NEXT: Fetch clause starting at 54:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
+; EG-NEXT: Fetch clause starting at 56:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
+; EG-NEXT: Fetch clause starting at 58:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
+; EG-NEXT: Fetch clause starting at 60:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
+; EG-NEXT: Fetch clause starting at 62:
+; EG-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
+; EG-NEXT: Fetch clause starting at 64:
+; EG-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
+; EG-NEXT: Fetch clause starting at 66:
+; EG-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
+; EG-NEXT: ALU clause starting at 68:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T11.X, 0.0,
+; EG-NEXT: ALU clause starting at 70:
+; EG-NEXT: LSHL T0.W, T12.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 76:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T7.X,
+; EG-NEXT: ALU clause starting at 82:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T7.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T9.X,
+; EG-NEXT: ALU clause starting at 88:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: ALU clause starting at 94:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 100:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T7.X,
+; EG-NEXT: ALU clause starting at 106:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T7.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T9.X,
+; EG-NEXT: ALU clause starting at 112:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 118:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 124:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: ALU clause starting at 130:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: ALU clause starting at 136:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T1.W, T12.X, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 142:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T12.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 148:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T12.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T12.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: ALU clause starting at 154:
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T13.X, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T11.Z, PV.W, PS,
+; EG-NEXT: MOV T6.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: ALU clause starting at 160:
+; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T2.X, PV.W, literal.x,
-; EG-NEXT: MOV T7.W, T4.X,
-; EG-NEXT: MOV * T1.W, T3.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T14.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T1.W, T11.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T11.X, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.X,
+; EG-NEXT: MOV * T12.W, T3.X,
+; EG-NEXT: MOV T12.Y, T5.X,
+; EG-NEXT: MOV T11.W, T7.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T11.Y, T9.X,
;
; CM-LABEL: v16i16_arg:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @22, KC0[], KC1[]
-; CM-NEXT: TEX 7 @6
-; CM-NEXT: ALU 11, @23, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T2.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X
+; CM-NEXT: ALU 1, @68, KC0[], KC1[]
+; CM-NEXT: TEX 0 @36
+; CM-NEXT: ALU 5, @70, KC0[], KC1[]
+; CM-NEXT: TEX 0 @38
+; CM-NEXT: ALU 5, @76, KC0[], KC1[]
+; CM-NEXT: TEX 0 @40
+; CM-NEXT: ALU 5, @82, KC0[], KC1[]
+; CM-NEXT: TEX 0 @42
+; CM-NEXT: ALU 5, @88, KC0[], KC1[]
+; CM-NEXT: TEX 0 @44
+; CM-NEXT: ALU 5, @94, KC0[], KC1[]
+; CM-NEXT: TEX 0 @46
+; CM-NEXT: ALU 5, @100, KC0[], KC1[]
+; CM-NEXT: TEX 0 @48
+; CM-NEXT: ALU 5, @106, KC0[], KC1[]
+; CM-NEXT: TEX 0 @50
+; CM-NEXT: ALU 5, @112, KC0[], KC1[]
+; CM-NEXT: TEX 0 @52
+; CM-NEXT: ALU 5, @118, KC0[], KC1[]
+; CM-NEXT: TEX 0 @54
+; CM-NEXT: ALU 5, @124, KC0[], KC1[]
+; CM-NEXT: TEX 0 @56
+; CM-NEXT: ALU 5, @130, KC0[], KC1[]
+; CM-NEXT: TEX 0 @58
+; CM-NEXT: ALU 5, @136, KC0[], KC1[]
+; CM-NEXT: TEX 0 @60
+; CM-NEXT: ALU 5, @142, KC0[], KC1[]
+; CM-NEXT: TEX 0 @62
+; CM-NEXT: ALU 5, @148, KC0[], KC1[]
+; CM-NEXT: TEX 0 @64
+; CM-NEXT: ALU 5, @154, KC0[], KC1[]
+; CM-NEXT: TEX 0 @66
+; CM-NEXT: ALU 14, @160, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T14.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T13.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 84, #3
-; CM-NEXT: VTX_READ_16 T2.X, T0.X, 86, #3
-; CM-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
-; CM-NEXT: VTX_READ_16 T4.X, T0.X, 94, #3
-; CM-NEXT: VTX_READ_16 T5.X, T0.X, 76, #3
-; CM-NEXT: VTX_READ_16 T6.X, T0.X, 92, #3
-; CM-NEXT: VTX_READ_16 T7.X, T0.X, 68, #3
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 70, #3
-; CM-NEXT: ALU clause starting at 22:
-; CM-NEXT: MOV * T0.X, 0.0,
-; CM-NEXT: ALU clause starting at 23:
-; CM-NEXT: MOV * T1.Y, T2.X,
-; CM-NEXT: MOV T7.Y, T0.X,
-; CM-NEXT: MOV T1.Z, T6.X, BS:VEC_120/SCL_212
+; CM-NEXT: Fetch clause starting at 36:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 98, #3
+; CM-NEXT: Fetch clause starting at 38:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 90, #3
+; CM-NEXT: Fetch clause starting at 40:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 82, #3
+; CM-NEXT: Fetch clause starting at 42:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 74, #3
+; CM-NEXT: Fetch clause starting at 44:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 96, #3
+; CM-NEXT: Fetch clause starting at 46:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 88, #3
+; CM-NEXT: Fetch clause starting at 48:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 80, #3
+; CM-NEXT: Fetch clause starting at 50:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 72, #3
+; CM-NEXT: Fetch clause starting at 52:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 94, #3
+; CM-NEXT: Fetch clause starting at 54:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 86, #3
+; CM-NEXT: Fetch clause starting at 56:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 78, #3
+; CM-NEXT: Fetch clause starting at 58:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 70, #3
+; CM-NEXT: Fetch clause starting at 60:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 92, #3
+; CM-NEXT: Fetch clause starting at 62:
+; CM-NEXT: VTX_READ_16 T12.X, T11.X, 84, #3
+; CM-NEXT: Fetch clause starting at 64:
+; CM-NEXT: VTX_READ_16 T13.X, T11.X, 76, #3
+; CM-NEXT: Fetch clause starting at 66:
+; CM-NEXT: VTX_READ_16 T11.X, T11.X, 68, #3
+; CM-NEXT: ALU clause starting at 68:
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: MOV * T11.X, 0.0,
+; CM-NEXT: ALU clause starting at 70:
+; CM-NEXT: LSHL T0.Z, T12.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 76:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T7.X,
+; CM-NEXT: ALU clause starting at 82:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T7.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: ALU clause starting at 88:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T3.X,
+; CM-NEXT: ALU clause starting at 94:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T3.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: ALU clause starting at 100:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T7.X,
+; CM-NEXT: ALU clause starting at 106:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T7.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: ALU clause starting at 112:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 118:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 124:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T6.X,
+; CM-NEXT: ALU clause starting at 130:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T6.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: ALU clause starting at 136:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T12.X, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T2.X,
+; CM-NEXT: ALU clause starting at 142:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T12.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T2.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: ALU clause starting at 148:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T12.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T12.X, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.X,
+; CM-NEXT: MOV * T0.Y, T6.X,
+; CM-NEXT: ALU clause starting at 154:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, T13.X, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T11.Z, PV.Z, PV.W,
+; CM-NEXT: MOV T6.X, PV.Z,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: ALU clause starting at 160:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.X, PV.W, literal.x,
-; CM-NEXT: MOV T7.Z, T5.X,
-; CM-NEXT: MOV * T1.W, T4.X, BS:VEC_120/SCL_212
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR T2.X, KC0[2].Y, literal.x,
-; CM-NEXT: MOV * T7.W, T3.X,
+; CM-NEXT: LSHR * T13.X, PV.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; CM-NEXT: AND_INT * T0.W, T11.X, literal.z,
+; CM-NEXT: 2(2.802597e-45), -65536(nan)
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T11.X, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.X,
+; CM-NEXT: MOV * T12.W, T3.X,
+; CM-NEXT: MOV T12.Y, T5.X,
+; CM-NEXT: MOV * T11.W, T7.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV * T11.Y, T9.X,
entry:
store <16 x i16> %in, ptr addrspace(1) %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
index 4309cfbe..c29c52c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scale.pk.ll
@@ -11,6 +11,12 @@ declare <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp4(i32 %src, i32 %scale, i
declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %src, i32 %scale, i32 %scale_sel)
declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.bf8(<2 x i32> %src, i32 %scale, i32 %scale_sel)
declare <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 %src, i32 %scale, i32 %scale_sel)
+declare <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
+declare <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
+declare <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
+declare <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
+declare <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
+declare <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> %src, i32 %scale, i32 %scale_sel)
define amdgpu_ps void @test_cvt_scale_pk8_f16_fp8_vv(<2 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
; GFX1250-SDAG-LABEL: test_cvt_scale_pk8_f16_fp8_vv:
@@ -162,3 +168,207 @@ define amdgpu_ps void @test_cvt_scale_pk8_f32_fp4_vv(i32 %src, i32 %scale, ptr a
store <8 x float> %cvt, ptr addrspace(1) %out, align 32
ret void
}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_fp6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_fp6_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f16_fp6 v[6:13], v[0:2], v3
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_fp6_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f16_fp6 v[6:13], v[0:2], v3
+; GFX1250-GISEL-NEXT: s_clause 0x1
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %src, i32 %scale, i32 0)
+ store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_fp6_sl(<3 x i32> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_fp6_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v12, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f16_fp6 v[2:9], v[10:12], 0x64 scale_sel:1
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_fp6_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v10, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f16_fp6 v[2:9], v[10:12], 0x64 scale_sel:1
+; GFX1250-GISEL-NEXT: s_clause 0x1
+; GFX1250-GISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %src, i32 100, i32 1)
+ store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_fp6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_fp6_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scale_pk16_bf16_fp6 v[6:13], v[0:2], v3 scale_sel:2
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> %src, i32 %scale, i32 2)
+ store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_fp6_sl(<3 x i32> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_fp6_sl:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-NEXT: v_mov_b32_e32 v12, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_scale_pk16_bf16_fp6 v[2:9], v[10:12], 0x64 scale_sel:3
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> %src, i32 100, i32 3)
+ store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_bf6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_bf6_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f16_bf6 v[6:13], v[0:2], v3 scale_sel:4
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_bf6_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f16_bf6 v[6:13], v[0:2], v3 scale_sel:4
+; GFX1250-GISEL-NEXT: s_clause 0x1
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %src, i32 %scale, i32 4)
+ store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f16_bf6_sl(<3 x i32> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f16_bf6_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v12, s2
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f16_bf6 v[2:9], v[10:12], 0x64 scale_sel:5
+; GFX1250-SDAG-NEXT: s_clause 0x1
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f16_bf6_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
+; GFX1250-GISEL-NEXT: v_mov_b32_e32 v10, s0
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f16_bf6 v[2:9], v[10:12], 0x64 scale_sel:5
+; GFX1250-GISEL-NEXT: s_clause 0x1
+; GFX1250-GISEL-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %src, i32 100, i32 5)
+ store <16 x half> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_bf6_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[6:13], v[0:2], v3 scale_sel:6
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 %scale, i32 6)
+ store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_bf16_bf6_sl(<3 x i32> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_cvt_scale_pk16_bf16_bf6_sl:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_mov_b32 v11, s1
+; GFX1250-NEXT: v_mov_b32_e32 v12, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_scale_pk16_bf16_bf6 v[2:9], v[10:12], 0x64 scale_sel:7
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16
+; GFX1250-NEXT: global_store_b128 v[0:1], v[2:5], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %src, i32 100, i32 7)
+ store <16 x bfloat> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f32_fp6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f32_fp6_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f32_fp6 v[6:21], v[0:2], v3 scale_sel:5
+; GFX1250-SDAG-NEXT: s_clause 0x3
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f32_fp6_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f32_fp6 v[6:21], v[0:2], v3 scale_sel:5
+; GFX1250-GISEL-NEXT: s_clause 0x3
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> %src, i32 %scale, i32 5)
+ store <16 x float> %cvt, ptr addrspace(1) %out, align 16
+ ret void
+}
+
+define amdgpu_ps void @test_cvt_scale_pk16_f32_bf6_vv(<3 x i32> %src, i32 %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_cvt_scale_pk16_f32_bf6_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scale_pk16_f32_bf6 v[6:21], v[0:2], v3 scale_sel:6
+; GFX1250-SDAG-NEXT: s_clause 0x3
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_cvt_scale_pk16_f32_bf6_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scale_pk16_f32_bf6 v[6:21], v[0:2], v3 scale_sel:6
+; GFX1250-GISEL-NEXT: s_clause 0x3
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[10:13], off offset:16
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[14:17], off offset:32
+; GFX1250-GISEL-NEXT: global_store_b128 v[4:5], v[18:21], off offset:48
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> %src, i32 %scale, i32 6)
+ store <16 x float> %cvt, ptr addrspace(1) %out, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll
new file mode 100644
index 0000000..dfb9089
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk16.gfx1250.ll
@@ -0,0 +1,303 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f32(<16 x float> %src, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f32(<16 x float> %src, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.bf16(<16 x bfloat> %src, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f16(<16 x half> %src, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.bf16(<16 x bfloat> %src, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f16(<16 x half> %src, float %scale)
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_f32_vv(<16 x float> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1210-SDAG-LABEL: test_scalef32_pk16_bf6_f32_vv:
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v23, v18 :: v_dual_mov_b32 v22, v17
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_f32 v[18:20], v[0:15], v16
+; GFX1250-SDAG-NEXT: global_store_b96 v[22:23], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v22, v17 :: v_dual_mov_b32 v23, v18
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_f32 v[18:20], v[0:15], v16
+; GFX1250-GISEL-NEXT: global_store_b96 v[22:23], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f32(<16 x float> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_f32_sl(<16 x float> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v15, s13
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v17, s15
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_f32 v[18:20], v[2:17], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_f32 v[18:20], v[2:17], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f32(<16 x float> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_f32_vv(<16 x float> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v23, v18 :: v_dual_mov_b32 v22, v17
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_f32 v[18:20], v[0:15], v16
+; GFX1250-SDAG-NEXT: global_store_b96 v[22:23], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v22, v17 :: v_dual_mov_b32 v23, v18
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_f32 v[18:20], v[0:15], v16
+; GFX1250-GISEL-NEXT: global_store_b96 v[22:23], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f32(<16 x float> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_f32_sl(<16 x float> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v15, s13
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v17, s15
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_f32 v[18:20], v[2:17], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_f32 v[18:20], v[2:17], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f32(<16 x float> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_bf16_vv(<16 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.bf16(<16 x bfloat> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_bf16_sl(<16 x bfloat> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.bf16(<16 x bfloat> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_f16_vv(<16 x half> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v14, v9 :: v_dual_mov_b32 v15, v10
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f16(<16 x half> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_bf6_f16_sl(<16 x half> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_bf6_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_bf6_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f16(<16 x half> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_bf16_vv(<16 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.bf16(<16 x bfloat> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_bf16_sl(<16 x bfloat> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.bf16(<16 x bfloat> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_f16_vv(<16 x half> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v15, v10 :: v_dual_mov_b32 v14, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v14, v9 :: v_dual_mov_b32 v15, v10
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b96 v[14:15], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f16(<16 x half> %src, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk16_fp6_f16_sl(<16 x half> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk16_fp6_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk16_fp6_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f16(<16 x half> %src, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll
new file mode 100644
index 0000000..cd0b081
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll
@@ -0,0 +1,403 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float %scale)
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[8:9], v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[8:9], v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[8:9], v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[8:9], v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[8:9], v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b64 v[8:9], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[8:9], v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b64 v[8:9], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[12:13], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b64 v[12:13], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_bf8_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[12:13], v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b64 v[12:13], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp8_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f32 v9, v[0:7], v8
+; GFX1250-SDAG-NEXT: global_store_b32 v[10:11], v9, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f32 v9, v[0:7], v8
+; GFX1250-GISEL-NEXT: global_store_b32 v[12:13], v9, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f32 v10, v[2:9], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v10, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f32 v10, v[2:9], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v10, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f16 v5, v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v5, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f16 v5, v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b32 v[8:9], v5, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f16 v6, v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f16 v6, v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v5, v[0:3], v4
+; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v5, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v5, v[0:3], v4
+; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v5, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_pk8_fp4_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v6, v[2:5], 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v6, v[2:5], 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll
new file mode 100644
index 0000000..d33acf6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk.gfx1250.ll
@@ -0,0 +1,385 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
+
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f16(<8 x half> %src, i32 %sr, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f16(<8 x half> %src, i32 %sr, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f32(<8 x float> %src, i32 %sr, float %scale)
+declare <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f32(<8 x float> %src, i32 %sr, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f32(<8 x float> %src, i32 %sr, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f16(<8 x half> %src, i32 %sr, float %scale)
+declare i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_bf16_vv(<8 x bfloat> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_bf16 v[8:9], v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_bf16 v[8:9], v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_bf16_sl(<8 x bfloat> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_bf16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_bf16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.bf16(<8 x bfloat> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_bf16_vv(<8 x bfloat> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_bf16 v[8:9], v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_bf16 v[8:9], v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_bf16_sl(<8 x bfloat> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_bf16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_bf16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.bf16(<8 x bfloat> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_f16_vv(<8 x half> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_f16 v[8:9], v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_f16 v[8:9], v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f16(<8 x half> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_f16_sl(<8 x half> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_f16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_f16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f16(<8 x half> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_f16_vv(<8 x half> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_f16 v[8:9], v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_f16 v[8:9], v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f16(<8 x half> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_f16_sl(<8 x half> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_f16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_f16 v[6:7], v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f16(<8 x half> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_f32_vv(<8 x float> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_f32 v[12:13], v[0:7], v8, v9
+; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_f32 v[12:13], v[0:7], v8, v9
+; GFX1250-GISEL-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f32(<8 x float> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_bf8_f32_sl(<8 x float> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_bf8_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[2:9], s8, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_bf8_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[2:9], s8, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f32(<8 x float> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_f32_vv(<8 x float> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_f32 v[12:13], v[0:7], v8, v9
+; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_f32 v[12:13], v[0:7], v8, v9
+; GFX1250-GISEL-NEXT: global_store_b64 v[10:11], v[12:13], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f32(<8 x float> %src, i32 %sr, float %scale)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp8_f32_sl(<8 x float> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp8_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[2:9], s8, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp8_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[2:9], s8, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f32(<8 x float> %src, i32 %sr, float 100.0)
+ store <2 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_f32_vv(<8 x float> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_f32_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_f32 v12, v[0:7], v8, v9
+; GFX1250-SDAG-NEXT: global_store_b32 v[10:11], v12, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_f32_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_f32 v12, v[0:7], v8, v9
+; GFX1250-GISEL-NEXT: global_store_b32 v[10:11], v12, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f32(<8 x float> %src, i32 %sr, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_f32_sl(<8 x float> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[2:9], s8, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v10, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[2:9], s8, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v10, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f32(<8 x float> %src, i32 %sr, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_f16_vv(<8 x half> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_f16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_f16 v8, v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v8, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_f16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_f16 v8, v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v8, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f16(<8 x half> %src, i32 %sr, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_f16_sl(<8 x half> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_f16 v6, v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_f16 v6, v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f16(<8 x half> %src, i32 %sr, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_bf16_vv(<8 x bfloat> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_bf16_vv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_bf16 v8, v[0:3], v4, v5
+; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v8, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_bf16_vv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_bf16 v8, v[0:3], v4, v5
+; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v8, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.bf16(<8 x bfloat> %src, i32 %sr, float %scale)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk8_fp4_bf16_sl(<8 x bfloat> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk8_fp4_bf16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk8_fp4_bf16 v6, v[2:5], s4, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk8_fp4_bf16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk8_fp4_bf16 v6, v[2:5], s4, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.bf16(<8 x bfloat> %src, i32 %sr, float 100.0)
+ store i32 %cvt, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll
new file mode 100644
index 0000000..c439518
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.pk16.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.bf16(<16 x bfloat> %src, i32 %sr, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f16(<16 x half> %src, i32 %sr, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f32(<16 x float> %src, i32 %sr, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.bf16(<16 x bfloat> %src, i32 %sr, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f16(<16 x half> %src, i32 %sr, float %scale)
+declare <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f32(<16 x float> %src, i32 %sr, float %scale)
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_bf16_vv(<16 x bfloat> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_bf6_bf16_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_bf6_bf16 v[12:14], v[0:7], v8, v9
+; GFX1250-NEXT: global_store_b96 v[10:11], v[12:14], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.bf16(<16 x bfloat> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_bf16_sl(<16 x bfloat> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_bf6_bf16_sl:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.bf16(<16 x bfloat> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_f16_vv(<16 x half> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_bf6_f16_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_bf6_f16 v[12:14], v[0:7], v8, v9
+; GFX1250-NEXT: global_store_b96 v[10:11], v[12:14], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f16(<16 x half> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_f16_sl(<16 x half> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk16_bf6_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk16_bf6_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f16(<16 x half> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_bf16_vv(<16 x bfloat> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_fp6_bf16_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_fp6_bf16 v[12:14], v[0:7], v8, v9
+; GFX1250-NEXT: global_store_b96 v[10:11], v[12:14], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.bf16(<16 x bfloat> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_bf16_sl(<16 x bfloat> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_fp6_bf16_sl:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.bf16(<16 x bfloat> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_f16_vv(<16 x half> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_fp6_f16_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_fp6_f16 v[12:14], v[0:7], v8, v9
+; GFX1250-NEXT: global_store_b96 v[10:11], v[12:14], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f16(<16 x half> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_f16_sl(<16 x half> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk16_fp6_f16_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk16_fp6_f16_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[2:9], s8, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[10:12], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f16(<16 x half> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_f32_vv(<16 x float> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_bf6_f32_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_bf6_f32 v[20:22], v[0:15], v16, v17
+; GFX1250-NEXT: global_store_b96 v[18:19], v[20:22], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f32(<16 x float> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_bf6_f32_sl(<16 x float> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk16_bf6_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v15, s13
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v17, s15
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk16_bf6_f32 v[18:20], v[2:17], s16, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk16_bf6_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk16_bf6_f32 v[18:20], v[2:17], s16, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f32(<16 x float> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_f32_vv(<16 x float> %src, i32 %sr, float %scale, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_scalef32_sr_pk16_fp6_f32_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: v_cvt_scalef32_sr_pk16_fp6_f32 v[20:22], v[0:15], v16, v17
+; GFX1250-NEXT: global_store_b96 v[18:19], v[20:22], off
+; GFX1250-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f32(<16 x float> %src, i32 %sr, float %scale)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_ps void @test_scalef32_sr_pk16_fp6_f32_sl(<16 x float> inreg %src, i32 inreg %sr, ptr addrspace(1) %out) {
+; GFX1250-SDAG-LABEL: test_scalef32_sr_pk16_fp6_f32_sl:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v10, s8 :: v_dual_mov_b32 v11, s9
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v12, s10 :: v_dual_mov_b32 v13, s11
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v14, s12 :: v_dual_mov_b32 v15, s13
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v16, s14 :: v_dual_mov_b32 v17, s15
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_cvt_scalef32_sr_pk16_fp6_f32 v[18:20], v[2:17], s16, 0x42c80000
+; GFX1250-SDAG-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: test_scalef32_sr_pk16_fp6_f32_sl:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_cvt_scalef32_sr_pk16_fp6_f32 v[18:20], v[2:17], s16, 0x42c80000
+; GFX1250-GISEL-NEXT: global_store_b96 v[0:1], v[18:20], off
+; GFX1250-GISEL-NEXT: s_endpgm
+ %cvt = tail call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f32(<16 x float> %src, i32 %sr, float 100.0)
+ store <3 x i32> %cvt, ptr addrspace(1) %out, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.pk.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.pk.ll
new file mode 100644
index 0000000..d2f96c4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.pk.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+
+declare <2 x i32> @llvm.amdgcn.perm.pk16.b4.u4(i32, i32, <2 x i32>)
+declare <3 x i32> @llvm.amdgcn.perm.pk16.b6.u4(i32, i64, <2 x i32>)
+declare <4 x i32> @llvm.amdgcn.perm.pk16.b8.u4(i64, i64, <2 x i32>)
+
+define void @test_perm_pk16_b4_u4(i32 %a, i32 %b, <2 x i32> %c, ptr %out) {
+; GFX1250-LABEL: test_perm_pk16_b4_u4:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_perm_pk16_b4_u4 v[0:1], v0, v1, v[2:3]
+; GFX1250-NEXT: flat_store_b64 v[4:5], v[0:1] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+ %ret = tail call <2 x i32> @llvm.amdgcn.perm.pk16.b4.u4(i32 %a, i32 %b, <2 x i32> %c)
+ store <2 x i32> %ret, ptr %out, align 8
+ ret void
+}
+
+define void @test_perm_pk16_b6_u4(i32 %a, i64 %b, <2 x i32> %c, ptr %out) {
+; GFX1250-SDAG-LABEL: test_perm_pk16_b6_u4:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v9, v4
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v3, v2
+; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_mov_b32 v6, v5
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-NEXT: v_perm_pk16_b6_u4 v[0:2], v0, v[2:3], v[8:9]
+; GFX1250-SDAG-NEXT: flat_store_b96 v[6:7], v[0:2] scope:SCOPE_SE
+; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0
+; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-GISEL-LABEL: test_perm_pk16_b6_u4:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v9, v2
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-GISEL-NEXT: v_perm_pk16_b6_u4 v[0:2], v0, v[8:9], v[2:3]
+; GFX1250-GISEL-NEXT: flat_store_b96 v[4:5], v[0:2] scope:SCOPE_SE
+; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0
+; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %ret = tail call <3 x i32> @llvm.amdgcn.perm.pk16.b6.u4(i32 %a, i64 %b, <2 x i32> %c)
+ store <3 x i32> %ret, ptr %out, align 16
+ ret void
+}
+
+define void @test_perm_pk16_b8_u4(i64 %a, i64 %b, <2 x i32> %c, ptr %out) {
+; GFX1250-LABEL: test_perm_pk16_b8_u4:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_perm_pk16_b8_u4 v[0:3], v[0:1], v[2:3], v[4:5]
+; GFX1250-NEXT: flat_store_b128 v[6:7], v[0:3] scope:SCOPE_SE
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+ %ret = tail call <4 x i32> @llvm.amdgcn.perm.pk16.b8.u4(i64 %a, i64 %b, <2 x i32> %c)
+ store <4 x i32> %ret, ptr %out, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll
new file mode 100644
index 0000000..4f7bbf8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll
@@ -0,0 +1,416 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+
+define amdgpu_kernel void @v_permlane_bcast_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+; GFX1250-LABEL: v_permlane_bcast_b32_vss:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s3, s4
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_bcast_b32_vii(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_bcast_b32_vii:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, 1, 2
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 1, i32 2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_bcast_b32_vll(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_bcast_b32_vll:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_movk_i32 s2, 0x64
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s2, 0x66
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 100, i32 102)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_bcast_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-SDAG-LABEL: v_permlane_bcast_b32_vvv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-SDAG-NEXT: v_permlane_bcast_b32 v1, v1, s3, s2
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: v_permlane_bcast_b32_vvv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_permlane_bcast_b32 v0, v0, s3, s4
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %tidx, i32 %tidy)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_down_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+; GFX1250-LABEL: v_permlane_down_b32_vss:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s3, s4
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_down_b32_vii(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_down_b32_vii:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_down_b32 v0, v0, 1, 2
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 1, i32 2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_down_b32_vll(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_down_b32_vll:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_movk_i32 s2, 0x64
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s2, 0x66
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 100, i32 102)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_down_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-SDAG-LABEL: v_permlane_down_b32_vvv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-SDAG-NEXT: v_permlane_down_b32 v1, v1, s3, s2
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: v_permlane_down_b32_vvv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_permlane_down_b32 v0, v0, s3, s4
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %tidx, i32 %tidy)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_up_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+; GFX1250-LABEL: v_permlane_up_b32_vss:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s3, s4
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_up_b32_vii(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_up_b32_vii:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_up_b32 v0, v0, 1, 2
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 1, i32 2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_up_b32_vll(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_up_b32_vll:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_movk_i32 s2, 0x64
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s2, 0x66
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 100, i32 102)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_up_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-SDAG-LABEL: v_permlane_up_b32_vvv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-SDAG-NEXT: v_permlane_up_b32 v1, v1, s3, s2
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: v_permlane_up_b32_vvv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_permlane_up_b32 v0, v0, s3, s4
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %tidx, i32 %tidy)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_xor_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
+; GFX1250-LABEL: v_permlane_xor_b32_vss:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s3, s4
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_xor_b32_vii(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_xor_b32_vii:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, 1, 2
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 1, i32 2)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_xor_b32_vll(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_xor_b32_vll:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_movk_i32 s2, 0x64
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s2, 0x66
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 100, i32 102)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_xor_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-SDAG-LABEL: v_permlane_xor_b32_vvv:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0
+; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250-SDAG-NEXT: v_permlane_xor_b32 v1, v1, s3, s2
+; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-SDAG-NEXT: s_endpgm
+;
+; GFX1250-GISEL-LABEL: v_permlane_xor_b32_vvv:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1
+; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-NEXT: v_permlane_xor_b32 v0, v0, s3, s4
+; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-GISEL-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %tidx, i32 %tidy)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_idx_gen_b32_vs(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
+; GFX1250-LABEL: v_permlane_idx_gen_b32_vs:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, s3
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_idx_gen_b32_vi(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_idx_gen_b32_vi:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, 1
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 1)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_idx_gen_b32_vl(ptr addrspace(1) %out, i32 %src0) {
+; GFX1250-LABEL: v_permlane_idx_gen_b32_vl:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, 0x64
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 100)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @v_permlane_idx_gen_b32_vv(ptr addrspace(1) %out) {
+; GFX1250-LABEL: v_permlane_idx_gen_b32_vv:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_bfe_u32 v1, v0, 10, 10
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-NEXT: v_readfirstlane_b32 s2, v1
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, s2
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+ %tidx = call i32 @llvm.amdgcn.workitem.id.x()
+ %tidy = call i32 @llvm.amdgcn.workitem.id.y()
+ %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %tidx, i32 %tidy)
+ store i32 %v, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 4491c4b..8c8dd83 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -232,32 +232,38 @@ define amdgpu_kernel void @constant_load_v3i16(ptr addrspace(1) %out, ptr addrsp
;
; EG-LABEL: constant_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX12-LABEL: constant_load_v3i16:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
index b39b38a..5c4bc95 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll
@@ -9832,24 +9832,50 @@ define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; GFX12-LABEL: constant_zextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -9951,23 +9977,56 @@ define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.W, T4.X, literal.x,
-; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; GFX12-LABEL: constant_sextload_v4i8_to_v4i16:
; GFX12: ; %bb.0:
@@ -10088,27 +10147,80 @@ define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_zextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T11.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; GFX12-LABEL: constant_zextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10255,28 +10367,93 @@ define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(ptr addrspace(1) %out
;
; EG-LABEL: constant_sextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; GFX12-LABEL: constant_sextload_v8i8_to_v8i16:
; GFX12: ; %bb.0:
@@ -10472,37 +10649,146 @@ define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 103, @12, KC0[], KC1[]
+; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT: BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT: BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 116:
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -10753,38 +11039,173 @@ define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 104, @12, KC0[], KC1[]
+; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T10.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 117:
+; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_sextload_v16i8_to_v16i16:
; GFX12: ; %bb.0:
@@ -11132,58 +11553,276 @@ define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_zextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 103, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @120, KC0[], KC1[]
+; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT: BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT: BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT: BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT: BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T16.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT: BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT: AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T12.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT: AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 120:
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T32.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T28.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T24.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T20.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: ALU clause starting at 225:
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_zextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
@@ -11642,60 +12281,331 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(ptr addrspace(1) %o
;
; EG-LABEL: constant_sextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 104, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @121, KC0[], KC1[]
+; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T14.X, PV.W, literal.x,
-; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T11.X, PS, literal.x,
-; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T12.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 121:
+; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, T32.X,
+; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T28.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 226:
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, T24.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: LSHL T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; GFX12-LABEL: constant_sextload_v32i8_to_v32i16:
; GFX12: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 3753737..ff5b9aa 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -263,63 +263,74 @@ define amdgpu_kernel void @global_load_v3i16(ptr addrspace(1) %out, ptr addrspac
;
; EG-LABEL: global_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 14, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T3.X, 0
-; EG-NEXT: MEM_RAT MSKOR T2.XW, T0.X
+; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0
+; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 10:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T5.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 13:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T5.X, literal.y,
; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T2.X, T2.W, PV.W,
-; EG-NEXT: LSHL * T2.W, literal.x, PV.W,
+; EG-NEXT: LSHL T5.X, T2.W, PV.W,
+; EG-NEXT: LSHL * T5.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T5.Y, 0.0,
+; EG-NEXT: MOV * T5.Z, 0.0,
+; EG-NEXT: LSHR T8.X, T0.W, literal.x,
+; EG-NEXT: LSHL T0.W, T7.X, literal.y,
+; EG-NEXT: AND_INT * T1.W, T6.X, literal.z,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T2.Y, 0.0,
-; EG-NEXT: MOV * T2.Z, 0.0,
-; EG-NEXT: LSHR T0.X, T0.W, literal.x,
-; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CM-LABEL: global_load_v3i16:
; CM: ; %bb.0: ; %entry
-; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @6
-; CM-NEXT: ALU 15, @11, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT MSKOR T2.XW, T3.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X
; CM-NEXT: CF_END
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_16 T1.X, T0.X, 0, #1
-; CM-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; CM-NEXT: ALU clause starting at 10:
-; CM-NEXT: MOV * T0.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 11:
+; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1
+; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1
+; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T5.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 13:
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; CM-NEXT: AND_INT * T1.W, PV.W, literal.x,
; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT: AND_INT T0.Z, T5.X, literal.x,
; CM-NEXT: LSHL * T1.W, PV.W, literal.y,
; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
-; CM-NEXT: LSHL T2.X, PV.Z, PV.W,
-; CM-NEXT: LSHL * T2.W, literal.x, PV.W,
+; CM-NEXT: LSHL T5.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T5.W, literal.x, PV.W,
; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; CM-NEXT: MOV T2.Y, 0.0,
-; CM-NEXT: MOV * T2.Z, 0.0,
-; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
+; CM-NEXT: MOV T5.Y, 0.0,
+; CM-NEXT: MOV * T5.Z, 0.0,
+; CM-NEXT: LSHL T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W,
+; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: LSHR * T3.X, T0.W, literal.x,
+; CM-NEXT: LSHR * T8.X, T0.W, literal.x,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%ld = load <3 x i16>, ptr addrspace(1) %in
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
index 5bc02c4..6a39df9 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i8.ll
@@ -9887,46 +9887,97 @@ define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_zextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1
+; EG-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T7.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T0.W, T7.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: AND_INT T4.X, T4.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; CM-LABEL: global_zextload_v4i8_to_v4i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
+; CM-NEXT: ALU 31, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T4.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: MOV * T7.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: AND_INT T0.Z, T7.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T7.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T4.Y, T4.X, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T7.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT * T4.X, T4.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T8.Y, PV.Z, PV.W,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.Y,
+; CM-NEXT: MOV * T8.X, T4.X,
%load = load <4 x i8>, ptr addrspace(1) %in
%ext = zext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10017,43 +10068,109 @@ define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_sextload_v4i8_to_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1
+; EG-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XY, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T4.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.W, T4.X, literal.x,
-; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
-; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: MOV * T7.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T7.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T8.Y, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.Y,
+; EG-NEXT: MOV * T8.X, T4.X,
;
; CM-LABEL: global_sextload_v4i8_to_v4i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X
+; CM-NEXT: ALU 37, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T7.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1
+; CM-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T4.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T4.X, literal.x,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: MOV * T7.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T7.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T7.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T8.Y, PV.Z, PV.W,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.Y,
+; CM-NEXT: MOV * T8.X, T4.X,
%load = load <4 x i8>, ptr addrspace(1) %in
%ext = sext <4 x i8> %load to <4 x i16>
store <4 x i16> %ext, ptr addrspace(1) %out
@@ -10158,52 +10275,156 @@ define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_zextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 61, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: AND_INT T0.W, T11.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T11.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T11.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; EG-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T6.X, T5.X, literal.x,
-; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 2(2.802597e-45)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T11.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T11.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; CM-LABEL: global_zextload_v8i8_to_v8i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT: ALU 60, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T5.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: MOV * T11.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: AND_INT T0.Z, T11.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T11.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T6.W, T5.Y, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T11.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T11.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T6.Y, T5.X, literal.x, T0.W,
-; CM-NEXT: AND_INT * T6.Z, T5.Y, literal.y,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T6.X, T5.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T11.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T11.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T12.W, PV.Z, PV.W,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T12.X, T8.X,
+; CM-NEXT: MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
%load = load <8 x i8>, ptr addrspace(1) %in
%ext = zext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10344,53 +10565,183 @@ define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(ptr addrspace(1) %out,
;
; EG-LABEL: global_sextload_v8i8_to_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T5.X, 1
+; EG-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T11.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; EG-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T5.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: MOV * T11.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T6.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T5.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T12.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT * T6.Y, PS, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T12.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T12.X, T8.X,
+; EG-NEXT: MOV * T12.Z, T4.X,
;
; CM-LABEL: global_sextload_v8i8_to_v8i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; CM-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T5.X
+; CM-NEXT: ALU 74, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T11.X
; CM-NEXT: CF_END
; CM-NEXT: PAD
; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1
+; CM-NEXT: VTX_READ_64 T11.XY, T11.X, 0, #1
; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T5.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT * T6.Z, T5.Y, 0.0, literal.x,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: MOV * T11.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: BFE_INT * T0.W, T11.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T6.X, T5.X, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T5.Y, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T0.Z, T5.X, literal.x,
-; CM-NEXT: BFE_INT * T6.W, PV.W, 0.0, literal.x,
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T6.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T11.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T11.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T12.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T11.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T11.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T12.W, PV.Z, PV.W,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T12.X, T8.X,
+; CM-NEXT: MOV * T12.Z, T4.X, BS:VEC_120/SCL_212
%load = load <8 x i8>, ptr addrspace(1) %in
%ext = sext <8 x i8> %load to <8 x i16>
store <8 x i16> %ext, ptr addrspace(1) %out
@@ -10547,71 +10898,287 @@ define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(ptr addrspace(1) %out
;
; EG-LABEL: global_zextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 103, @12, KC0[], KC1[]
+; EG-NEXT: ALU 20, @116, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT T0.W, T19.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T19.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T8.W, T7.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T19.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T8.Y, T7.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T8.Z, T7.Y, literal.y,
-; EG-NEXT: BFE_UINT * T9.W, T7.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T8.X, T7.X, literal.x,
-; EG-NEXT: BFE_UINT T9.Y, T7.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT * T9.Z, T7.W, literal.x,
-; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; EG-NEXT: AND_INT T9.X, T7.Z, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 255(3.573311e-43), 16(2.242078e-44)
-; EG-NEXT: LSHR * T10.X, PV.W, literal.x,
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T19.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T19.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T19.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 116:
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_zextload_v16i8_to_v16i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 101, @12, KC0[], KC1[]
+; CM-NEXT: ALU 20, @114, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T7.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T19.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: AND_INT T0.Z, T19.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T19.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T8.W, T7.W, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T8.Y, T7.Z, literal.x, T0.W,
-; CM-NEXT: AND_INT T8.Z, T7.W, literal.y,
-; CM-NEXT: BFE_UINT * T9.W, T7.Y, literal.x, T0.W,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT T8.X, T7.Z, literal.x,
-; CM-NEXT: BFE_UINT T9.Y, T7.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T12.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T13.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T19.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T19.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T19.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT: -65536(nan), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 114:
+; CM-NEXT: BFE_UINT * T0.W, T19.W, literal.x, T0.W,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T10.X, PV.W, literal.x,
-; CM-NEXT: AND_INT * T9.Z, T7.Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T9.X, T7.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T0.W, T0.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT: LSHR T22.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T19.W, PV.Y, PV.Z,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T20.X, T16.X,
+; CM-NEXT: MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T19.X, T8.X,
+; CM-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
%load = load <16 x i8>, ptr addrspace(1) %in
%ext = zext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, ptr addrspace(1) %out
@@ -10844,72 +11411,343 @@ define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(ptr addrspace(1) %out
;
; EG-LABEL: global_sextload_v16i8_to_v16i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1
+; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 104, @12, KC0[], KC1[]
+; EG-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T22.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T21.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x,
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T19.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.Y, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T7.W, literal.x,
-; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x,
-; EG-NEXT: LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y,
-; EG-NEXT: LSHR T1.Z, T7.Z, literal.y,
-; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T10.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T20.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 117:
+; EG-NEXT: OR_INT * T19.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T19.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR T0.W, T19.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 16(2.242078e-44)
+; EG-NEXT: LSHR T21.X, PS, literal.x,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.y,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T22.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T19.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T20.X, T16.X,
+; EG-NEXT: MOV * T20.Z, T12.X,
+; EG-NEXT: MOV T19.X, T8.X,
+; EG-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_sextload_v16i8_to_v16i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 0 @6
-; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X
+; CM-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 104, @12, KC0[], KC1[]
+; CM-NEXT: ALU 46, @117, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T22.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T21.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 6:
-; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
-; CM-NEXT: ALU clause starting at 8:
-; CM-NEXT: MOV * T7.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 9:
-; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x,
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 0, #1
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T19.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: BFE_INT * T0.W, T19.X, 0.0, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x,
-; CM-NEXT: LSHR T0.Y, T7.Y, literal.x,
-; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x,
-; CM-NEXT: LSHR * T0.W, T7.W, literal.x,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x,
-; CM-NEXT: LSHR T1.Y, T7.Z, literal.x,
-; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y,
-; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT: LSHR T10.X, PV.Z, literal.x,
-; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y,
-; CM-NEXT: LSHR T0.Z, T7.X, literal.y,
-; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
+; CM-NEXT: LSHR * T0.W, T19.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T20.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T12.X,
+; CM-NEXT: BFE_INT * T0.W, T19.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, T13.X,
+; CM-NEXT: LSHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T20.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, T8.X,
+; CM-NEXT: BFE_INT * T0.W, T19.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: ALU clause starting at 117:
+; CM-NEXT: OR_INT * T19.Y, T0.Z, T0.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T19.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T19.W, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, PV.Y, literal.y,
+; CM-NEXT: LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T22.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T19.W, PV.Y, PV.Z,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T20.X, T16.X,
+; CM-NEXT: MOV * T20.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T19.X, T8.X,
+; CM-NEXT: MOV * T19.Z, T4.X, BS:VEC_120/SCL_212
%load = load <16 x i8>, ptr addrspace(1) %in
%ext = sext <16 x i8> %load to <16 x i16>
store <16 x i16> %ext, ptr addrspace(1) %out
@@ -11181,115 +12019,543 @@ define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(ptr addrspace(1) %out
;
; EG-LABEL: global_zextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 37, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 103, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @120, KC0[], KC1[]
+; EG-NEXT: ALU 41, @225, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T0.W, T37.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 255(3.573311e-43), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T0.W, T37.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
; EG-NEXT: MOV * T0.W, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT * T13.W, T11.Y, literal.x, PV.W,
+; EG-NEXT: BFE_UINT T1.W, T37.X, literal.x, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T13.Y, T11.X, literal.x, T0.W,
-; EG-NEXT: AND_INT T13.Z, T11.Y, literal.y,
-; EG-NEXT: BFE_UINT * T14.W, T11.W, literal.x, T0.W,
-; EG-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; EG-NEXT: AND_INT T13.X, T11.X, literal.x,
-; EG-NEXT: BFE_UINT T14.Y, T11.Z, literal.y, T0.W,
-; EG-NEXT: LSHR * T11.X, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: AND_INT T14.Z, T11.W, literal.x,
-; EG-NEXT: BFE_UINT * T15.W, T12.Y, literal.y, T0.W,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; EG-NEXT: AND_INT T14.X, T11.Z, literal.x,
-; EG-NEXT: BFE_UINT T15.Y, T12.X, literal.y, T0.W,
-; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; EG-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T12.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Y, literal.x, T0.W,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T16.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T15.Z, T12.Y, literal.y,
-; EG-NEXT: BFE_UINT T17.W, T12.W, literal.z, T0.W,
-; EG-NEXT: AND_INT * T15.X, T12.X, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Y, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_UINT T17.Y, T12.Z, literal.x, T0.W,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T12.X, PV.W, literal.x,
-; EG-NEXT: AND_INT T17.Z, T12.W, literal.y,
-; EG-NEXT: AND_INT * T17.X, T12.Z, literal.y,
-; EG-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T8.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.Y, PV.W, PS,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T37.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 120:
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T32.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.X, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.X, literal.x, T0.W, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T28.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Y, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Y, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T24.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.Z, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHL * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: BFE_UINT * T1.W, T35.Z, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, PV.W, T1.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T1.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV * T0.Y, T20.X,
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T2.W, T35.W, literal.y,
+; EG-NEXT: -65536(nan), 255(3.573311e-43)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: ALU clause starting at 225:
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: LSHL * T1.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; EG-NEXT: OR_INT * T1.W, PV.W, PS,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: BFE_UINT * T0.W, T35.W, literal.x, T0.W,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR * T18.X, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16711680(2.341805e-38), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_zextload_v32i8_to_v32i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @8
-; CM-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T12.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T16.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T15.X
+; CM-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 101, @16, KC0[], KC1[]
+; CM-NEXT: ALU 101, @118, KC0[], KC1[]
+; CM-NEXT: ALU 40, @220, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T42.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T39.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1
-; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1
-; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T11.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T35.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: AND_INT T0.Z, T37.X, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 255(3.573311e-43), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T0.W, T37.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
; CM-NEXT: MOV * T0.W, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT * T13.W, T11.W, literal.x, PV.W,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.X, literal.y, PV.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.X, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_UINT T13.Y, T11.Z, literal.x, T0.W,
-; CM-NEXT: AND_INT T13.Z, T11.W, literal.y,
-; CM-NEXT: BFE_UINT * T14.W, T11.Y, literal.x, T0.W,
-; CM-NEXT: 8(1.121039e-44), 255(3.573311e-43)
-; CM-NEXT: AND_INT T13.X, T11.Z, literal.x,
-; CM-NEXT: BFE_UINT T14.Y, T11.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T15.X, PV.W, literal.x,
-; CM-NEXT: AND_INT T14.Z, T11.Y, literal.y,
-; CM-NEXT: BFE_UINT * T11.W, T12.W, literal.z, T0.W,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T12.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.Y, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT T14.X, T11.X, literal.x,
-; CM-NEXT: BFE_UINT T11.Y, T12.Z, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
-; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T16.X, PV.W, literal.x,
-; CM-NEXT: AND_INT T11.Z, T12.W, literal.y,
-; CM-NEXT: BFE_UINT * T17.W, T12.Y, literal.z, T0.W,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T13.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.Y, literal.x,
; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: AND_INT T11.X, T12.Z, literal.x,
-; CM-NEXT: BFE_UINT T17.Y, T12.X, literal.y, T0.W,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 255(3.573311e-43), 8(1.121039e-44)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T8.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T9.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T37.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T37.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T4.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T37.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T5.X,
+; CM-NEXT: AND_INT * T0.Z, PV.Y, literal.x,
+; CM-NEXT: -65536(nan), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 118:
+; CM-NEXT: BFE_UINT * T1.W, T37.W, literal.x, T0.W,
; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T18.X, PV.W, literal.x,
-; CM-NEXT: AND_INT * T17.Z, T12.Y, literal.y,
-; CM-NEXT: 2(2.802597e-45), 255(3.573311e-43)
-; CM-NEXT: AND_INT * T17.X, T12.X, literal.x,
-; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
-; CM-NEXT: LSHR * T12.X, KC0[2].Y, literal.x,
+; CM-NEXT: OR_INT * T1.W, T0.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T32.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.X, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T32.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T33.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.X, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T33.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T33.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T28.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.Y, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T28.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T29.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.Y, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT: MOV T29.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T24.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.Z, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHL * T1.W, T36.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T24.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T25.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T1.W, T36.Z, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T25.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T1.W, T36.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T25.X, PV.Y,
+; CM-NEXT: MOV * T0.Y, T20.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, T36.W, literal.y,
+; CM-NEXT: -65536(nan), 255(3.573311e-43)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T20.X, PV.W,
+; CM-NEXT: ALU clause starting at 220:
+; CM-NEXT: MOV T0.Y, T20.X,
+; CM-NEXT: LSHL * T1.W, T36.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T1.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16711680(2.341805e-38)
+; CM-NEXT: OR_INT * T1.W, PV.Z, PV.W,
+; CM-NEXT: MOV T20.X, PV.W,
+; CM-NEXT: MOV * T0.Y, T21.X,
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: BFE_UINT * T0.W, T36.W, literal.y, T0.W,
+; CM-NEXT: -65536(nan), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T39.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT: LSHR T40.X, PV.W, literal.x,
+; CM-NEXT: LSHR * T0.W, T36.W, literal.y,
+; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.w,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16711680(2.341805e-38), 16(2.242078e-44)
+; CM-NEXT: LSHR T42.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T36.W, PV.Y, PV.Z,
; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T35.X, T16.X,
+; CM-NEXT: MOV * T35.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T37.X, T8.X,
+; CM-NEXT: MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T38.X, T32.X,
+; CM-NEXT: MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T36.X, T24.X,
+; CM-NEXT: MOV * T36.Z, T20.X, BS:VEC_120/SCL_212
%load = load <32 x i8>, ptr addrspace(1) %in
%ext = zext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr addrspace(1) %out
@@ -11717,118 +12983,659 @@ define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(ptr addrspace(1) %out
;
; EG-LABEL: global_sextload_v32i8_to_v32i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 1 @8
-; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1
+; EG-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 104, @16, KC0[], KC1[]
+; EG-NEXT: ALU 104, @121, KC0[], KC1[]
+; EG-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T42.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T40.X, 0
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 1
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; EG-NEXT: ALU clause starting at 12:
-; EG-NEXT: MOV * T11.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 13:
-; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
-; EG-NEXT: LSHR T14.X, PV.W, literal.x,
-; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Y, T12.W, literal.x,
-; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: LSHR T0.W, T12.Y, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Y, literal.x,
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; EG-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: MOV * T0.Y, T16.X,
+; EG-NEXT: MOV * T35.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x,
-; EG-NEXT: LSHR T1.Y, T11.W, literal.x,
-; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.X, literal.x,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x,
-; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T11.Z, literal.x,
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T16.X, PV.W,
+; EG-NEXT: MOV T0.Y, T17.X,
+; EG-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T17.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.Y, PV.W, PS,
+; EG-NEXT: MOV T17.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T12.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x,
-; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x,
-; EG-NEXT: LSHR T0.Z, T12.X, literal.x,
-; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
-; EG-NEXT: 8(1.121039e-44), 32(4.484155e-44)
-; EG-NEXT: LSHR T11.X, PS, literal.x,
-; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: LSHR T0.Z, T12.Z, literal.y,
-; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y,
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T12.X, PS, literal.x,
-; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y,
-; EG-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T12.X, PV.W,
+; EG-NEXT: MOV T0.Y, T13.X,
+; EG-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T36.W, PV.W, PS,
+; EG-NEXT: MOV T13.X, PV.W,
+; EG-NEXT: MOV T0.Y, T8.X,
+; EG-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T8.X, PV.W,
+; EG-NEXT: MOV T0.Y, T9.X,
+; EG-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T9.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: ALU clause starting at 121:
+; EG-NEXT: OR_INT * T37.Y, T1.W, T0.W,
+; EG-NEXT: MOV T9.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T4.X,
+; EG-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T4.X, PV.W,
+; EG-NEXT: MOV T0.Y, T5.X,
+; EG-NEXT: LSHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T37.W, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T37.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV T0.Y, T32.X,
+; EG-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T32.X, PV.W,
+; EG-NEXT: MOV T0.Y, T33.X,
+; EG-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T33.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.X, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.Y, PV.W, PS,
+; EG-NEXT: MOV T33.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T28.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T28.X, PV.W,
+; EG-NEXT: MOV T0.Y, T29.X,
+; EG-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: ALU clause starting at 226:
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, T0.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T38.W, PV.W, PS,
+; EG-NEXT: MOV T29.X, PV.W,
+; EG-NEXT: MOV T0.Y, T24.X,
+; EG-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T24.X, PV.W,
+; EG-NEXT: MOV T0.Y, T25.X,
+; EG-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T25.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; EG-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: OR_INT * T35.Y, PV.W, PS,
+; EG-NEXT: MOV T25.X, PV.Y,
+; EG-NEXT: MOV T0.Y, T20.X,
+; EG-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: -65536(nan), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV * T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T20.X, PV.W,
+; EG-NEXT: MOV T0.Y, T21.X,
+; EG-NEXT: LSHR * T0.W, T35.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), -65536(nan)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T21.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T39.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T40.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, T35.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT: 24(3.363116e-44), 48(6.726233e-44)
+; EG-NEXT: LSHR T41.X, PS, literal.x,
+; EG-NEXT: AND_INT T0.Z, T0.Y, literal.y,
+; EG-NEXT: LSHL T0.W, PV.W, literal.z,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.w,
+; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44)
+; EG-NEXT: LSHR T42.X, PS, literal.x,
+; EG-NEXT: OR_INT * T35.W, PV.Z, PV.W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T21.X, PV.W,
+; EG-NEXT: MOV * T36.X, T16.X,
+; EG-NEXT: MOV * T36.Z, T12.X,
+; EG-NEXT: MOV T37.X, T8.X,
+; EG-NEXT: MOV T37.Z, T4.X, BS:VEC_120/SCL_212
+; EG-NEXT: MOV * T38.X, T32.X,
+; EG-NEXT: MOV * T38.Z, T28.X,
+; EG-NEXT: MOV T35.X, T24.X,
+; EG-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
;
; CM-LABEL: global_sextload_v32i8_to_v32i16:
; CM: ; %bb.0:
-; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
-; CM-NEXT: TEX 1 @8
-; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[]
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X
-; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X
+; CM-NEXT: ALU 1, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 104, @16, KC0[], KC1[]
+; CM-NEXT: ALU 104, @121, KC0[], KC1[]
+; CM-NEXT: ALU 95, @226, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T42.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T41.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T40.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T36, T39.X
; CM-NEXT: CF_END
-; CM-NEXT: Fetch clause starting at 8:
-; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1
-; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1
-; CM-NEXT: ALU clause starting at 12:
-; CM-NEXT: MOV * T11.X, KC0[2].Z,
-; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 16, #1
+; CM-NEXT: VTX_READ_128 T35.XYZW, T35.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: MOV * T0.Y, T16.X,
+; CM-NEXT: MOV * T35.X, KC0[2].Z,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: BFE_INT * T0.W, T37.X, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T0.Y, literal.y,
+; CM-NEXT: 65535(9.183409e-41), -65536(nan)
+; CM-NEXT: OR_INT * T0.W, PV.W, PV.Z,
+; CM-NEXT: MOV * T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T16.X, PV.W,
+; CM-NEXT: MOV T0.Y, T17.X,
+; CM-NEXT: LSHR * T0.W, T37.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T17.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T36.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T17.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T12.X,
+; CM-NEXT: BFE_INT * T0.W, T37.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T12.X, PV.W,
+; CM-NEXT: MOV T0.Y, T13.X,
+; CM-NEXT: LSHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T36.W, PV.Z, PV.W,
+; CM-NEXT: MOV T13.X, PV.W,
+; CM-NEXT: MOV T0.Y, T8.X,
+; CM-NEXT: BFE_INT * T0.W, T37.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T8.X, PV.W,
+; CM-NEXT: MOV T0.Y, T9.X,
+; CM-NEXT: LSHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T9.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: ALU clause starting at 121:
+; CM-NEXT: OR_INT * T37.Y, T0.Z, T0.W,
+; CM-NEXT: MOV T9.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T4.X,
+; CM-NEXT: BFE_INT * T0.W, T37.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T4.X, PV.W,
+; CM-NEXT: MOV T0.Y, T5.X,
+; CM-NEXT: LSHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T37.W, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T37.W, PV.Z, PV.W,
+; CM-NEXT: MOV T5.X, PV.W,
+; CM-NEXT: MOV T0.Y, T32.X,
+; CM-NEXT: BFE_INT * T0.W, T35.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T32.X, PV.W,
+; CM-NEXT: MOV T0.Y, T33.X,
+; CM-NEXT: LSHR * T0.W, T35.X, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T33.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.X, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T38.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T33.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T28.X,
+; CM-NEXT: BFE_INT * T0.W, T35.Y, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T28.X, PV.W,
+; CM-NEXT: MOV T0.Y, T29.X,
+; CM-NEXT: LSHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.Y, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 226:
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, T0.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T38.W, PV.Z, PV.W,
+; CM-NEXT: MOV T29.X, PV.W,
+; CM-NEXT: MOV T0.Y, T24.X,
+; CM-NEXT: BFE_INT * T0.W, T35.Z, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T24.X, PV.W,
+; CM-NEXT: MOV T0.Y, T25.X,
+; CM-NEXT: LSHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T25.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: ASHR * T0.W, T35.Z, literal.x,
+; CM-NEXT: 24(3.363116e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T35.Y, PV.Z, PV.W,
+; CM-NEXT: MOV T25.X, PV.Y,
+; CM-NEXT: MOV T0.Y, T20.X,
+; CM-NEXT: BFE_INT * T0.W, T35.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, PV.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T20.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
+; CM-NEXT: LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV T20.X, PV.W,
+; CM-NEXT: MOV T0.Y, T21.X,
+; CM-NEXT: LSHR * T0.W, T35.W, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: -65536(nan), 65535(9.183409e-41)
+; CM-NEXT: OR_INT * T0.W, PV.Z, PV.W,
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T0.Y, PV.X,
; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
-; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T13.X, PV.W, literal.x,
-; CM-NEXT: LSHR T0.Y, T11.Y, literal.y,
-; CM-NEXT: LSHR T0.Z, T11.Z, literal.y,
-; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
-; CM-NEXT: LSHR T14.X, PV.W, literal.x,
-; CM-NEXT: LSHR T1.Y, T11.W, literal.y,
-; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212
-; CM-NEXT: LSHR * T0.W, T12.X, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x,
-; CM-NEXT: LSHR T2.Y, T12.Y, literal.x,
-; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x,
-; CM-NEXT: LSHR * T1.W, T12.W, literal.x,
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x,
-; CM-NEXT: LSHR T3.Y, T12.Z, literal.x,
-; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x,
-; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x,
-; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x,
-; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x,
-; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212
-; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
-; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x,
-; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x,
-; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y,
-; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x,
-; CM-NEXT: 8(1.121039e-44), 16(2.242078e-44)
-; CM-NEXT: LSHR T18.X, PV.Z, literal.x,
-; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y,
-; CM-NEXT: LSHR T0.Z, T11.X, literal.y,
-; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
-; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x,
-; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y,
-; CM-NEXT: 2(2.802597e-45), 8(1.121039e-44)
+; CM-NEXT: LSHR T39.X, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44)
+; CM-NEXT: LSHR T40.X, PV.W, literal.x,
+; CM-NEXT: ASHR * T0.W, T35.W, literal.y,
+; CM-NEXT: 2(2.802597e-45), 24(3.363116e-44)
+; CM-NEXT: LSHR T41.X, KC0[2].Y, literal.x,
+; CM-NEXT: AND_INT T0.Y, T0.Y, literal.y,
+; CM-NEXT: LSHL T0.Z, PV.W, literal.z,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2(2.802597e-45), 65535(9.183409e-41)
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T42.X, PV.W, literal.x,
+; CM-NEXT: OR_INT * T35.W, PV.Y, PV.Z,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOV * T21.X, PV.W,
+; CM-NEXT: MOV T36.X, T16.X,
+; CM-NEXT: MOV * T36.Z, T12.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T37.X, T8.X,
+; CM-NEXT: MOV * T37.Z, T4.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T38.X, T32.X,
+; CM-NEXT: MOV * T38.Z, T28.X, BS:VEC_120/SCL_212
+; CM-NEXT: MOV T35.X, T24.X,
+; CM-NEXT: MOV * T35.Z, T20.X, BS:VEC_120/SCL_212
%load = load <32 x i8>, ptr addrspace(1) %in
%ext = sext <32 x i8> %load to <32 x i16>
store <32 x i16> %ext, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index 8dcecfe..a209de7 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -151,19 +151,27 @@ define amdgpu_kernel void @local_load_v3i16(ptr addrspace(3) %out, ptr addrspace
;
; EG-LABEL: local_load_v3i16:
; EG: ; %bb.0: ; %entry
-; EG-NEXT: ALU 11, @2, KC0[CB0:0-32], KC1[]
-; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
-; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
-; EG-NEXT: MOV T0.X, OQAP,
+; EG-NEXT: ALU 19, @2, KC0[CB0:0-32], KC1[]
; EG-NEXT: MOV * T0.W, KC0[2].Z,
; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
; EG-NEXT: MOV T0.Y, OQAP,
-; EG-NEXT: MOV * T0.W, KC0[2].Y,
-; EG-NEXT: LDS_WRITE * T0.W, T0.Y,
+; EG-NEXT: ADD_INT * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T0.W
+; EG-NEXT: MOV * T0.Z, OQAP,
+; EG-NEXT: LSHL T0.Z, PV.Z, literal.x,
+; EG-NEXT: AND_INT T0.W, T0.Y, literal.y,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Z, literal.z,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
+; EG-NEXT: LDS_USHORT_READ_RET * OQAP, T1.W
+; EG-NEXT: MOV T0.Y, OQAP,
+; EG-NEXT: OR_INT T0.W, T0.Z, T0.W,
+; EG-NEXT: MOV * T1.W, KC0[2].Y,
+; EG-NEXT: LDS_WRITE * T1.W, T0.W,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
-; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.X,
+; EG-NEXT: LDS_SHORT_WRITE * T0.W, T0.Y,
; EG-NEXT: RETURN
entry:
%ld = load <3 x i16>, ptr addrspace(3) %in
diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
index 22ebb55..702a69f 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll
@@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
+; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
+; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
-; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
+; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
+; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
@@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0
+; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
+; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-NEXT: s_wait_alu 0xf1ff
@@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d,
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0
-; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0
+; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6
; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1
+; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0
; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2
; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff
diff --git a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
index 05a0b1a..836e88c 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
@@ -35,13 +35,6 @@ define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) {
}
define amdgpu_ps float @mad_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
-; GCN-LABEL: mad_i32_sss:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_mul_i32 s0, s0, s1
-; GCN-NEXT: s_add_i32 s0, s0, s2
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: ; return to shader part epilog
-;
; GFX9-LABEL: mad_i32_sss:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_mul_i32 s0, s0, s1
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
index be02045..4c0ab91 100644
--- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -6982,7 +6982,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800
; CHECK-NEXT: s_cbranch_scc1 .LBB6_2
-; CHECK-NEXT: .LBB6_3: ; %Flow9
+; CHECK-NEXT: .LBB6_3: ; %Flow7
; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6
; CHECK-NEXT: s_cbranch_execz .LBB6_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
@@ -7048,7 +7048,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16
; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
; CHECK-NEXT: s_cbranch_scc0 .LBB6_5
-; CHECK-NEXT: .LBB6_6: ; %Flow10
+; CHECK-NEXT: .LBB6_6: ; %Flow8
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_setpc_b64 s[30:31]
;
@@ -7689,7 +7689,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2
-; ALIGNED-NEXT: .LBB6_3: ; %Flow9
+; ALIGNED-NEXT: .LBB6_3: ; %Flow7
; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB6_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
@@ -8316,7 +8316,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3
; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1
; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5
-; ALIGNED-NEXT: .LBB6_6: ; %Flow10
+; ALIGNED-NEXT: .LBB6_6: ; %Flow8
; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8
; ALIGNED-NEXT: s_clause 0x7
; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32
@@ -8369,7 +8369,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032
; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3
; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1
-; UNROLL3-NEXT: .LBB6_4: ; %Flow7
+; UNROLL3-NEXT: .LBB6_4: ; %Flow5
; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6
; UNROLL3-NEXT: s_cbranch_execz .LBB6_7
; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual
@@ -8403,7 +8403,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1
; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32
; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7]
; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6
-; UNROLL3-NEXT: .LBB6_7: ; %Flow8
+; UNROLL3-NEXT: .LBB6_7: ; %Flow6
; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8
; UNROLL3-NEXT: s_setpc_b64 s[30:31]
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
index 272daa9..dd5c247 100644
--- a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll
@@ -460,10 +460,10 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
; CHECK-NEXT: s_cbranch_execnz .LBB3_3
-; CHECK-NEXT: ; %bb.1: ; %Flow34
+; CHECK-NEXT: ; %bb.1: ; %Flow36
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
; CHECK-NEXT: s_cbranch_execnz .LBB3_10
-; CHECK-NEXT: .LBB3_2: ; %Flow35
+; CHECK-NEXT: .LBB3_2: ; %Flow37
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -494,7 +494,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB3_5
-; CHECK-NEXT: .LBB3_6: ; %Flow29
+; CHECK-NEXT: .LBB3_6: ; %Flow31
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
; CHECK-NEXT: s_cbranch_execz .LBB3_9
@@ -520,7 +520,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB3_8
-; CHECK-NEXT: .LBB3_9: ; %Flow27
+; CHECK-NEXT: .LBB3_9: ; %Flow29
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
@@ -556,7 +556,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_cbranch_execnz .LBB3_12
-; CHECK-NEXT: .LBB3_13: ; %Flow33
+; CHECK-NEXT: .LBB3_13: ; %Flow35
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB3_16
@@ -584,7 +584,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_cbranch_execnz .LBB3_15
-; CHECK-NEXT: .LBB3_16: ; %Flow31
+; CHECK-NEXT: .LBB3_16: ; %Flow33
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
@@ -907,10 +907,10 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1]
; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6
; CHECK-NEXT: s_cbranch_execnz .LBB6_3
-; CHECK-NEXT: ; %bb.1: ; %Flow41
+; CHECK-NEXT: ; %bb.1: ; %Flow39
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
; CHECK-NEXT: s_cbranch_execnz .LBB6_10
-; CHECK-NEXT: .LBB6_2: ; %Flow42
+; CHECK-NEXT: .LBB6_2: ; %Flow40
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB6_3: ; %memmove_copy_forward
@@ -940,7 +940,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB6_5
-; CHECK-NEXT: .LBB6_6: ; %Flow36
+; CHECK-NEXT: .LBB6_6: ; %Flow34
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_and_saveexec_b32 s8, s4
; CHECK-NEXT: s_cbranch_execz .LBB6_9
@@ -966,11 +966,11 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9
; CHECK-NEXT: s_cbranch_execnz .LBB6_8
-; CHECK-NEXT: .LBB6_9: ; %Flow34
+; CHECK-NEXT: .LBB6_9: ; %Flow32
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
-; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
@@ -1002,15 +1002,15 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: s_cbranch_execnz .LBB6_12
-; CHECK-NEXT: .LBB6_13: ; %Flow40
+; CHECK-NEXT: .LBB6_13: ; %Flow38
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB6_16
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
-; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
+; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB6_15: ; %memmove_bwd_main_loop
@@ -1030,7 +1030,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align
; CHECK-NEXT: global_store_dwordx4 v[12:13], v[8:11], off
; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; CHECK-NEXT: s_cbranch_execnz .LBB6_15
-; CHECK-NEXT: .LBB6_16: ; %Flow38
+; CHECK-NEXT: .LBB6_16: ; %Flow36
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6
; CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1181,8 +1181,8 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: .LBB8_9: ; %Flow31
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
-; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
+; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9
; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7
@@ -1219,10 +1219,10 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align
; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo
; CHECK-NEXT: s_cbranch_execz .LBB8_16
; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader
-; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
-; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16
; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo
+; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16
+; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: .p2align 6
; CHECK-NEXT: .LBB8_15: ; %memmove_bwd_main_loop
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 721f974..311527d 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -991,30 +991,81 @@ define amdgpu_kernel void @s_test_imin_sle_v2i16(ptr addrspace(1) %out, <2 x i16
define amdgpu_kernel void @s_test_imin_sle_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) #0 {
; EG-LABEL: s_test_imin_sle_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @14, KC0[], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 9, @15, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT: ALU 1, @28, KC0[], KC1[]
+; EG-NEXT: TEX 1 @12
+; EG-NEXT: ALU 9, @30, KC0[], KC1[]
+; EG-NEXT: TEX 1 @16
+; EG-NEXT: ALU 10, @40, KC0[], KC1[]
+; EG-NEXT: TEX 1 @20
+; EG-NEXT: ALU 10, @51, KC0[], KC1[]
+; EG-NEXT: TEX 1 @24
+; EG-NEXT: ALU 11, @62, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XY, T5.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 44, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 54, #3
-; EG-NEXT: ALU clause starting at 14:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: Fetch clause starting at 12:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 50, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 58, #3
+; EG-NEXT: Fetch clause starting at 16:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 48, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 56, #3
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 46, #3
+; EG-NEXT: VTX_READ_16 T7.X, T5.X, 54, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T6.X, T5.X, 44, #3
+; EG-NEXT: VTX_READ_16 T5.X, T5.X, 52, #3
+; EG-NEXT: ALU clause starting at 28:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T5.X, 0.0,
+; EG-NEXT: ALU clause starting at 30:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.Y, PV.Z, PV.W,
-; EG-NEXT: BFE_INT T0.Z, T3.X, 0.0, literal.x,
-; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 40:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: MIN_INT T0.X, PV.Z, PV.W,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 51:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MIN_INT T0.W, PV.Z, PV.W,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 62:
+; EG-NEXT: BFE_INT T0.Z, T6.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T5.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MIN_INT * T0.W, PV.Z, PV.W,
+; EG-NEXT: LSHR T5.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T1.W, T0.Y, literal.y,
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.z,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T6.X, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.X,
+; EG-NEXT: MOV * T6.Y, T3.X,
;
; CI-LABEL: s_test_imin_sle_v4i16:
; CI: ; %bb.0:
@@ -2154,40 +2205,49 @@ define amdgpu_kernel void @v_test_umin_ule_v3i32(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @v_test_umin_ule_v3i16(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
; EG-LABEL: v_test_umin_ule_v3i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 3, @14, KC0[CB0:0-32], KC1[]
-; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 17, @18, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T2.X, 0
-; EG-NEXT: MEM_RAT MSKOR T4.XW, T0.X
+; EG-NEXT: ALU 3, @20, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @8
+; EG-NEXT: ALU 11, @24, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 3 @12
+; EG-NEXT: ALU 8, @36, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T8.X, 0
+; EG-NEXT: MEM_RAT MSKOR T7.XW, T0.X
; EG-NEXT: CF_END
-; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_16 T2.X, T1.X, 0, #1
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T1.X, T1.X, 4, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
-; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_16 T7.X, T6.X, 4, #1
+; EG-NEXT: VTX_READ_16 T8.X, T0.X, 4, #1
+; EG-NEXT: Fetch clause starting at 12:
+; EG-NEXT: VTX_READ_16 T8.X, T6.X, 0, #1
+; EG-NEXT: VTX_READ_16 T9.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T6.X, T6.X, 2, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT: ALU clause starting at 20:
; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.W,
-; EG-NEXT: ALU clause starting at 18:
+; EG-NEXT: ADD_INT * T6.X, KC0[2].W, PV.W,
+; EG-NEXT: ALU clause starting at 24:
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
; EG-NEXT: ADD_INT * T1.W, PV.W, literal.x,
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT * T2.W, PV.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: LSHL T2.W, PV.W, literal.x,
-; EG-NEXT: MIN_UINT * T3.W, T0.X, T1.X,
+; EG-NEXT: MIN_UINT * T3.W, T8.X, T7.X,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
-; EG-NEXT: LSHL T4.X, PS, PV.W,
-; EG-NEXT: LSHL * T4.W, literal.x, PV.W,
+; EG-NEXT: LSHL T7.X, PS, PV.W,
+; EG-NEXT: LSHL * T7.W, literal.x, PV.W,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MOV T4.Y, 0.0,
-; EG-NEXT: MOV * T4.Z, 0.0,
+; EG-NEXT: MOV * T7.Y, 0.0,
+; EG-NEXT: ALU clause starting at 36:
+; EG-NEXT: MOV T7.Z, 0.0,
+; EG-NEXT: MIN_UINT * T2.W, T0.X, T6.X,
; EG-NEXT: LSHR T0.X, T1.W, literal.x,
-; EG-NEXT: MIN_UINT * T1.X, T3.X, T2.X,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; EG-NEXT: LSHR * T2.X, T0.W, literal.x,
+; EG-NEXT: LSHL T1.W, PV.W, literal.y,
+; EG-NEXT: MIN_UINT * T2.W, T9.X, T8.X,
+; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44)
+; EG-NEXT: OR_INT T6.X, PV.W, PS,
+; EG-NEXT: LSHR * T8.X, T0.W, literal.x,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; CI-LABEL: v_test_umin_ule_v3i16:
@@ -3483,46 +3543,142 @@ define amdgpu_kernel void @s_test_umin_ult_v8i32(ptr addrspace(1) %out, <8 x i32
define amdgpu_kernel void @s_test_umin_ult_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) #0 {
; EG-LABEL: s_test_umin_ult_v8i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @24, KC0[], KC1[]
-; EG-NEXT: TEX 2 @8
-; EG-NEXT: ALU 2, @25, KC0[], KC1[]
-; EG-NEXT: TEX 4 @14
-; EG-NEXT: ALU 14, @28, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; EG-NEXT: ALU 1, @52, KC0[], KC1[]
+; EG-NEXT: TEX 1 @20
+; EG-NEXT: ALU 9, @54, KC0[], KC1[]
+; EG-NEXT: TEX 1 @24
+; EG-NEXT: ALU 8, @64, KC0[], KC1[]
+; EG-NEXT: TEX 1 @28
+; EG-NEXT: ALU 10, @73, KC0[], KC1[]
+; EG-NEXT: TEX 1 @32
+; EG-NEXT: ALU 8, @84, KC0[], KC1[]
+; EG-NEXT: TEX 1 @36
+; EG-NEXT: ALU 10, @93, KC0[], KC1[]
+; EG-NEXT: TEX 1 @40
+; EG-NEXT: ALU 8, @104, KC0[], KC1[]
+; EG-NEXT: TEX 1 @44
+; EG-NEXT: ALU 10, @113, KC0[], KC1[]
+; EG-NEXT: TEX 1 @48
+; EG-NEXT: ALU 10, @124, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
-; EG-NEXT: Fetch clause starting at 8:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 62, #3
-; EG-NEXT: VTX_READ_16 T2.X, T0.X, 60, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 78, #3
-; EG-NEXT: Fetch clause starting at 14:
-; EG-NEXT: VTX_READ_16 T1.X, T0.X, 68, #3
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 52, #3
-; EG-NEXT: VTX_READ_16 T4.X, T0.X, 70, #3
-; EG-NEXT: VTX_READ_16 T5.X, T0.X, 54, #3
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 76, #3
-; EG-NEXT: ALU clause starting at 24:
-; EG-NEXT: MOV * T0.X, 0.0,
-; EG-NEXT: ALU clause starting at 25:
-; EG-NEXT: AND_INT T0.W, T1.X, literal.x,
-; EG-NEXT: AND_INT * T1.W, T3.X, literal.x,
+; EG-NEXT: Fetch clause starting at 20:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 66, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 82, #3
+; EG-NEXT: Fetch clause starting at 24:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 64, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 80, #3
+; EG-NEXT: Fetch clause starting at 28:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 62, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 78, #3
+; EG-NEXT: Fetch clause starting at 32:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 60, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 76, #3
+; EG-NEXT: Fetch clause starting at 36:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 58, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 74, #3
+; EG-NEXT: Fetch clause starting at 40:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 56, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 72, #3
+; EG-NEXT: Fetch clause starting at 44:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 54, #3
+; EG-NEXT: VTX_READ_16 T9.X, T7.X, 70, #3
+; EG-NEXT: Fetch clause starting at 48:
+; EG-NEXT: VTX_READ_16 T8.X, T7.X, 52, #3
+; EG-NEXT: VTX_READ_16 T7.X, T7.X, 68, #3
+; EG-NEXT: ALU clause starting at 52:
+; EG-NEXT: MOV * T0.Y, T3.X,
+; EG-NEXT: MOV * T7.X, 0.0,
+; EG-NEXT: ALU clause starting at 54:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 28:
-; EG-NEXT: AND_INT T0.Z, T2.X, literal.x,
-; EG-NEXT: AND_INT T2.W, T0.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT: MIN_UINT * T0.W, T0.W, T1.W,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: LSHL T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 64:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.Z, PV.Z, PV.W,
-; EG-NEXT: AND_INT T1.W, T5.X, literal.x,
-; EG-NEXT: AND_INT * T2.W, T4.X, literal.x,
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T3.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T2.X,
+; EG-NEXT: ALU clause starting at 73:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.Y, PV.W, PS,
-; EG-NEXT: AND_INT T1.W, T3.X, literal.x,
-; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: MIN_UINT T0.X, PV.W, PS,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T2.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 84:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T7.Z, PV.W, PS,
+; EG-NEXT: MOV T2.X, PV.Z,
+; EG-NEXT: MOV * T0.Y, T5.X,
+; EG-NEXT: ALU clause starting at 93:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 104:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.x,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, PS,
+; EG-NEXT: MOV T5.X, PV.W,
+; EG-NEXT: MOV * T0.Y, T4.X,
+; EG-NEXT: ALU clause starting at 113:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MIN_UINT T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T4.X, PV.W,
+; EG-NEXT: MOV * T0.Y, PV.X,
+; EG-NEXT: ALU clause starting at 124:
+; EG-NEXT: AND_INT T0.W, T8.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, T7.X, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
+; EG-NEXT: AND_INT T2.W, T0.Y, literal.y,
+; EG-NEXT: MIN_UINT * T0.W, PV.W, PS,
+; EG-NEXT: 2(2.802597e-45), -65536(nan)
+; EG-NEXT: OR_INT * T7.X, PV.W, PS,
+; EG-NEXT: MOV T4.X, PV.X,
+; EG-NEXT: MOV * T7.W, T3.X,
+; EG-NEXT: MOV * T7.Y, T5.X,
;
; CI-LABEL: s_test_umin_ult_v8i16:
; CI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll b/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll
new file mode 100644
index 0000000..c4a48a46
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll
@@ -0,0 +1,34 @@
+; REQUIRES: asserts
+
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -debug-only=gcn-subtarget < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s 2>&1 | FileCheck -check-prefixes=WARNING %s
+
+; CHECK: Post-MI-sched direction (postra-sched-topdown): topdown
+define float @postra-sched-topdown(float %input) nounwind #0 {
+ %x = fadd float %input, 1.000000e+00
+ ret float %x
+}
+
+; CHECK: Post-MI-sched direction (postra-sched-bottomup): bottomup
+define float @postra-sched-bottomup(float %input) nounwind #1 {
+ %x = fsub float %input, 1.000000e+00
+ ret float %x
+}
+
+; CHECK: Post-MI-sched direction (postra-sched-bidirectional): bidirectional
+define float @postra-sched-bidirectional(float %input) nounwind #2 {
+ %x = fadd float %input, 1.000000e+00
+ ret float %x
+}
+
+; CHECK: Post-MI-sched direction (postra-sched-warning): topdown
+; WARNING: invalid value for postRA direction attribute
+define float @postra-sched-warning(float %input) nounwind #3 {
+ %x = fsub float %input, 1.000000e+00
+ ret float %x
+}
+
+attributes #0 = {"amdgpu-post-ra-direction"="topdown"}
+attributes #1 = {"amdgpu-post-ra-direction"="bottomup"}
+attributes #2 = {"amdgpu-post-ra-direction"="bidirectional"}
+attributes #3 = {"amdgpu-post-ra-direction"="warning"}
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 7aa7342..28330bf 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -681,30 +681,63 @@ define amdgpu_kernel void @shl_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in
;
; EG-LABEL: shl_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 3, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T8.X, 1
+; EG-NEXT: ALU 42, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T8.XYZW, T0.X, 0, #1
+; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT: MOV T0.Y, T6.X,
+; EG-NEXT: LSHL * T0.W, T0.X, literal.x, BS:VEC_120/SCL_212
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT: ALU clause starting at 11:
-; EG-NEXT: LSHR T1.W, T8.Z, literal.x,
-; EG-NEXT: LSHR * T2.W, T8.X, literal.x,
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: AND_INT * T1.W, T10.Z, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, T10.X, PV.W,
+; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T2.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T1.W, PS, PV.W,
+; EG-NEXT: MOV * T6.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR T1.W, T10.Z, literal.x,
+; EG-NEXT: LSHR * T2.W, T10.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.Y, PS, PV.W,
-; EG-NEXT: AND_INT T1.W, T8.Z, literal.x,
-; EG-NEXT: AND_INT * T2.W, T8.X, literal.x,
+; EG-NEXT: LSHL T1.W, PS, PV.W,
+; EG-NEXT: AND_INT * T2.W, PV.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL T0.X, PS, PV.W,
+; EG-NEXT: LSHL * T1.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV * T0.X, T7.X,
+; EG-NEXT: AND_INT * T1.W, T10.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL T1.W, T10.Y, PV.W,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
+; EG-NEXT: MOV * T7.X, PV.W,
+; EG-NEXT: MOV T0.X, PV.X,
+; EG-NEXT: LSHR T1.W, T10.W, literal.x,
+; EG-NEXT: LSHR * T2.W, T10.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: LSHL * T1.W, PS, PV.W,
+; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; EG-NEXT: LSHL T1.W, PV.W, literal.y,
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W,
-; EG-NEXT: LSHR * T8.X, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
+; EG-NEXT: LSHR T0.X, PS, literal.x,
+; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T7.X, PV.Y,
+; EG-NEXT: MOV * T10.X, T6.X,
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep = getelementptr inbounds <4 x i16>, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr inbounds <4 x i16>, ptr addrspace(1) %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 5d169c1..80c0d0f 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -320,28 +320,67 @@ define amdgpu_kernel void @ashr_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %i
;
; EG-LABEL: ashr_v4i16:
; EG: ; %bb.0:
-; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 1, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[]
-; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XY, T8.X, 1
+; EG-NEXT: ALU 48, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
-; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1
+; EG-NEXT: VTX_READ_128 T9.XYZW, T9.X, 0, #1
; EG-NEXT: ALU clause starting at 8:
-; EG-NEXT: MOV * T7.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 9:
-; EG-NEXT: LSHR T0.Z, T7.X, literal.x,
-; EG-NEXT: BFE_INT T0.W, T7.X, 0.0, literal.x,
-; EG-NEXT: AND_INT * T1.W, T7.Z, literal.y,
+; EG-NEXT: MOV * T0.Y, T6.X,
+; EG-NEXT: MOV * T9.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: BFE_INT T0.W, T9.X, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.Z, literal.y,
; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
-; EG-NEXT: ASHR T7.X, PV.W, PS,
-; EG-NEXT: BFE_INT T0.W, PV.Z, 0.0, literal.x,
-; EG-NEXT: LSHR * T1.W, T7.Z, literal.x,
+; EG-NEXT: ASHR * T0.W, PV.W, PS,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.y,
+; EG-NEXT: 65535(9.183409e-41), -65536(nan)
+; EG-NEXT: OR_INT * T0.W, PS, PV.W,
+; EG-NEXT: MOV * T6.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T9.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T9.Z, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV T6.X, PV.W,
+; EG-NEXT: MOV T0.Y, T7.X,
+; EG-NEXT: BFE_INT T0.W, T9.Y, 0.0, literal.x,
+; EG-NEXT: AND_INT * T1.W, T9.W, literal.y,
+; EG-NEXT: 16(2.242078e-44), 65535(9.183409e-41)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
+; EG-NEXT: -65536(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, T1.W, PV.W,
+; EG-NEXT: MOV * T7.X, PV.W,
+; EG-NEXT: MOV T0.Y, PV.X,
+; EG-NEXT: LSHR * T0.W, T9.Y, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.W, T9.W, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: ASHR T0.W, PV.W, PS,
+; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x,
-; EG-NEXT: ASHR * T7.Y, PV.W, PS,
+; EG-NEXT: LSHR T9.X, KC0[2].Y, literal.x,
+; EG-NEXT: OR_INT * T10.Y, T1.W, PV.W,
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV T7.X, PV.Y,
+; EG-NEXT: MOV * T10.X, T6.X,
%b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %in, i16 1
%a = load <4 x i16>, ptr addrspace(1) %in
%b = load <4 x i16>, ptr addrspace(1) %b_ptr
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
index 5477d62..1e5ec59 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll
@@ -1,18 +1,103 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
-; FUNC-LABEL: {{^}}udiv24_i8:
-; SI: v_cvt_f32_ubyte
-; SI-DAG: v_cvt_f32_ubyte
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: udiv24_i8:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i8:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1
+; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i8:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.X, T0.X,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T1.W, PS, PV.W,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
%num = load i8, ptr addrspace(1) %in
%den = load i8, ptr addrspace(1) %den_ptr
@@ -21,17 +106,101 @@ define amdgpu_kernel void @udiv24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in
ret void
}
-; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in_out:
-; SI: v_cvt_f32_ubyte
-; SI-DAG: v_cvt_f32_ubyte
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+; SI-LABEL: udiv24_i8_denorm_flush_in_out:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i8_denorm_flush_in_out:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1
+; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i8_denorm_flush_in_out:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.X, T0.X,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T1.W, PS, PV.W,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
%num = load i8, ptr addrspace(1) %in
%den = load i8, ptr addrspace(1) %den_ptr
@@ -40,17 +209,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out,
ret void
}
-; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in:
-; SI: v_cvt_f32_ubyte
-; SI-DAG: v_cvt_f32_ubyte
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; SI-LABEL: udiv24_i8_denorm_flush_in:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i8_denorm_flush_in:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1
+; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i8_denorm_flush_in:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.X, T0.X,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T1.W, PS, PV.W,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
%num = load i8, ptr addrspace(1) %in
%den = load i8, ptr addrspace(1) %den_ptr
@@ -59,17 +312,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr
ret void
}
-; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_out:
-; SI: v_cvt_f32_ubyte
-; SI-DAG: v_cvt_f32_ubyte
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
+; SI-LABEL: udiv24_i8_denorm_flush_out:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i8_denorm_flush_out:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1
+; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i8_denorm_flush_out:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.X, T0.X,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T1.W, PS, PV.W,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
%num = load i8, ptr addrspace(1) %in
%den = load i8, ptr addrspace(1) %den_ptr
@@ -78,17 +415,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr
ret void
}
-; FUNC-LABEL: {{^}}udiv24_i16:
-; SI: v_cvt_f32_u32
-; SI: v_cvt_f32_u32
-; SI: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv24_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: udiv24_i16:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_u32_e32 v1, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i16:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
+; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_u32_e32 v1, v1
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i16:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.X, T0.X,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: ADD_INT * T1.W, PS, PV.W,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1
%num = load i16, ptr addrspace(1) %in, align 2
%den = load i16, ptr addrspace(1) %den_ptr, align 2
@@ -97,17 +518,85 @@ define amdgpu_kernel void @udiv24_i16(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}udiv23_i32:
-; SI: v_cvt_f32_u32
-; SI-DAG: v_cvt_f32_u32
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @udiv23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: udiv23_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0x7fffff
+; SI-NEXT: s_and_b32 s5, s5, 0x7fffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv23_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv23_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W,
+; EG-NEXT: AND_INT T0.W, T0.X, literal.x,
+; EG-NEXT: RECIP_IEEE * T0.X, PS,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: AND_INT T0.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -120,11 +609,88 @@ define amdgpu_kernel void @udiv23_i32(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}udiv24_i32:
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @udiv24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: udiv24_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0xffffff
+; SI-NEXT: s_and_b32 s5, s5, 0xffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv24_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv24_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -137,11 +703,88 @@ define amdgpu_kernel void @udiv24_i32(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32:
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @no_udiv24_u23_u24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: no_udiv24_u23_u24_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0x7fffff
+; SI-NEXT: s_and_b32 s5, s5, 0xffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: no_udiv24_u23_u24_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: no_udiv24_u23_u24_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -154,11 +797,88 @@ define amdgpu_kernel void @no_udiv24_u23_u24_i32(ptr addrspace(1) %out, ptr addr
ret void
}
-; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32:
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @no_udiv24_u24_u23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: no_udiv24_u24_u23_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0xffffff
+; SI-NEXT: s_and_b32 s5, s5, 0x7fffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: no_udiv24_u24_u23_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: no_udiv24_u24_u23_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -171,14 +891,113 @@ define amdgpu_kernel void @no_udiv24_u24_u23_i32(ptr addrspace(1) %out, ptr addr
ret void
}
-; FUNC-LABEL: {{^}}udiv25_i32:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @udiv25_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: udiv25_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0x1ffffff
+; SI-NEXT: s_and_b32 s5, s5, 0x1ffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s5
+; SI-NEXT: s_sub_i32 s6, 0, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s6, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s4, v0
+; SI-NEXT: v_readfirstlane_b32 s6, v0
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_mul_i32 s6, s6, s5
+; SI-NEXT: s_sub_i32 s4, s4, s6
+; SI-NEXT: s_sub_i32 s6, s4, s5
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: s_cselect_b32 s4, s6, s4
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: udiv25_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: udiv25_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -191,14 +1010,113 @@ define amdgpu_kernel void @udiv25_i32(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @test_no_udiv24_i32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_no_udiv24_i32_1:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0xffffff
+; SI-NEXT: s_and_b32 s5, s5, 0x1ffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s5
+; SI-NEXT: s_sub_i32 s6, 0, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s6, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s4, v0
+; SI-NEXT: v_readfirstlane_b32 s6, v0
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_mul_i32 s6, s6, s5
+; SI-NEXT: s_sub_i32 s4, s4, s6
+; SI-NEXT: s_sub_i32 s6, s4, s5
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: s_cselect_b32 s4, s6, s4
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_no_udiv24_i32_1:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0xffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_no_udiv24_i32_1:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -211,14 +1129,113 @@ define amdgpu_kernel void @test_no_udiv24_i32_1(ptr addrspace(1) %out, ptr addrs
ret void
}
-; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @test_no_udiv24_i32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_no_udiv24_i32_2:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0x1ffffff
+; SI-NEXT: s_and_b32 s5, s5, 0xffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s5
+; SI-NEXT: s_sub_i32 s6, 0, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s6, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s4, v0
+; SI-NEXT: v_readfirstlane_b32 s6, v0
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_mul_i32 s6, s6, s5
+; SI-NEXT: s_sub_i32 s4, s4, s6
+; SI-NEXT: s_sub_i32 s6, s4, s5
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: s_cselect_b32 s4, s6, s4
+; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0
+; SI-NEXT: s_cmp_ge_u32 s4, s5
+; SI-NEXT: s_cselect_b64 vcc, -1, 0
+; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_no_udiv24_i32_2:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b64 vcc, -1, 0
+; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_no_udiv24_i32_2:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: ADD_INT T0.Z, T0.X, 1,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS,
+; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z,
+; EG-NEXT: ADD_INT T3.W, PS, 1,
+; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -231,17 +1248,107 @@ define amdgpu_kernel void @test_no_udiv24_i32_2(ptr addrspace(1) %out, ptr addrs
ret void
}
-; FUNC-LABEL: {{^}}urem24_i8:
-; SI: v_cvt_f32_ubyte
-; SI-DAG: v_cvt_f32_ubyte
-; SI-DAG: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @urem24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: urem24_i8:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v4, v3
+; SI-NEXT: v_mul_f32_e32 v4, v2, v4
+; SI-NEXT: v_trunc_f32_e32 v4, v4
+; SI-NEXT: v_fma_f32 v2, -v4, v3, v2
+; SI-NEXT: v_cvt_u32_f32_e32 v4, v4
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v3
+; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; SI-NEXT: v_mul_lo_u32 v1, v2, v1
+; SI-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: urem24_i8:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1
+; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
+; VI-NEXT: v_rcp_iflag_f32_e32 v3, v2
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v1
+; VI-NEXT: v_mul_f32_e32 v3, v4, v3
+; VI-NEXT: v_trunc_f32_e32 v3, v3
+; VI-NEXT: v_cvt_u32_f32_e32 v5, v3
+; VI-NEXT: v_mad_f32 v3, -v3, v2, v4
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2
+; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
+; VI-NEXT: v_mul_lo_u32 v0, v2, v0
+; VI-NEXT: v_subrev_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: urem24_i8:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 25, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.W, T0.X,
+; EG-NEXT: MUL_IEEE * T1.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T1.W, PV.W,
+; EG-NEXT: MULADD_IEEE T0.W, -PV.W, T0.Y, T0.W,
+; EG-NEXT: TRUNC * T1.W, PV.W,
+; EG-NEXT: SETGE * T0.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.X,
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: SUB_INT * T1.W, T0.X, PS,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1
%num = load i8, ptr addrspace(1) %in
%den = load i8, ptr addrspace(1) %den_ptr
@@ -250,17 +1357,107 @@ define amdgpu_kernel void @urem24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in
ret void
}
-; FUNC-LABEL: {{^}}urem24_i16:
-; SI: v_cvt_f32_u32
-; SI: v_cvt_f32_u32
-; SI: v_rcp_iflag_f32
-; SI: v_cvt_u32_f32
-
-; EG: UINT_TO_FLT
-; EG-DAG: UINT_TO_FLT
-; EG-DAG: RECIP_IEEE
-; EG: FLT_TO_UINT
define amdgpu_kernel void @urem24_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: urem24_i16:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_mov_b32 s10, s6
+; SI-NEXT: s_mov_b32 s11, s7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s8, s2
+; SI-NEXT: s_mov_b32 s9, s3
+; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
+; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_cvt_f32_u32_e32 v2, v0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_cvt_f32_u32_e32 v3, v1
+; SI-NEXT: v_rcp_iflag_f32_e32 v4, v3
+; SI-NEXT: v_mul_f32_e32 v4, v2, v4
+; SI-NEXT: v_trunc_f32_e32 v4, v4
+; SI-NEXT: v_fma_f32 v2, -v4, v3, v2
+; SI-NEXT: v_cvt_u32_f32_e32 v4, v4
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v3
+; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; SI-NEXT: v_mul_lo_u32 v1, v2, v1
+; SI-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0
+; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: urem24_i16:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_mov_b32 s10, s6
+; VI-NEXT: s_mov_b32 s11, s7
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s8, s2
+; VI-NEXT: s_mov_b32 s9, s3
+; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
+; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0
+; VI-NEXT: s_mov_b32 s4, s0
+; VI-NEXT: s_mov_b32 s5, s1
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_cvt_f32_u32_e32 v2, v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_cvt_f32_u32_e32 v3, v1
+; VI-NEXT: v_rcp_iflag_f32_e32 v4, v2
+; VI-NEXT: v_mul_f32_e32 v4, v3, v4
+; VI-NEXT: v_trunc_f32_e32 v4, v4
+; VI-NEXT: v_cvt_u32_f32_e32 v5, v4
+; VI-NEXT: v_mad_f32 v3, -v4, v2, v3
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2
+; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
+; VI-NEXT: v_mul_lo_u32 v0, v2, v0
+; VI-NEXT: v_subrev_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: urem24_i16:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 25, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X,
+; EG-NEXT: RECIP_IEEE * T0.Z, PS,
+; EG-NEXT: UINT_TO_FLT * T0.W, T0.X,
+; EG-NEXT: MUL_IEEE * T1.W, PS, T0.Z,
+; EG-NEXT: TRUNC * T1.W, PV.W,
+; EG-NEXT: MULADD_IEEE T0.W, -PV.W, T0.Y, T0.W,
+; EG-NEXT: TRUNC * T1.W, PV.W,
+; EG-NEXT: SETGE * T0.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.X,
+; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
+; EG-NEXT: SUB_INT * T1.W, T0.X, PS,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.W, PS, literal.x,
+; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
+; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1
%num = load i16, ptr addrspace(1) %in, align 2
%den = load i16, ptr addrspace(1) %den_ptr, align 2
@@ -269,10 +1466,90 @@ define amdgpu_kernel void @urem24_i16(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}urem24_i32:
-; SI-NOT: v_rcp_f32
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @urem24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: urem24_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s6, s4, 0xffffff
+; SI-NEXT: s_and_b32 s7, s5, 0xffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s6
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s7
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_mul_lo_u32 v0, v0, s5
+; SI-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
+; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: urem24_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s2, s5, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s2
+; VI-NEXT: s_and_b32 s2, s4, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_mul_lo_u32 v0, v0, s5
+; VI-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
+; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: urem24_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.X, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -285,14 +1562,105 @@ define amdgpu_kernel void @urem24_i32(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}urem25_i32:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @urem25_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: urem25_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s2, s4, 0x1ffffff
+; SI-NEXT: s_and_b32 s4, s5, 0x1ffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: s_sub_i32 s5, 0, s4
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s5, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s2, v0
+; SI-NEXT: v_readfirstlane_b32 s5, v0
+; SI-NEXT: s_mul_i32 s5, s5, s4
+; SI-NEXT: s_sub_i32 s2, s2, s5
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s2, s5, s2
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s4, s5, s2
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: urem25_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s4, s6, s5
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: urem25_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.X, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -305,14 +1673,105 @@ define amdgpu_kernel void @urem25_i32(ptr addrspace(1) %out, ptr addrspace(1) %i
ret void
}
-; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @test_no_urem24_i32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_no_urem24_i32_1:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s2, s4, 0xffffff
+; SI-NEXT: s_and_b32 s4, s5, 0x1ffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: s_sub_i32 s5, 0, s4
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s5, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s2, v0
+; SI-NEXT: v_readfirstlane_b32 s5, v0
+; SI-NEXT: s_mul_i32 s5, s5, s4
+; SI-NEXT: s_sub_i32 s2, s2, s5
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s2, s5, s2
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s4, s5, s2
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_no_urem24_i32_1:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0xffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s4, s6, s5
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_no_urem24_i32_1:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.X, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -325,14 +1784,105 @@ define amdgpu_kernel void @test_no_urem24_i32_1(ptr addrspace(1) %out, ptr addrs
ret void
}
-; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
; RCP_IFLAG is for URECIP in the full 32b alg
-; SI: v_rcp_iflag
-; SI-NOT: v_rcp_f32
-
-; EG-NOT: UINT_TO_FLT
-; EG-NOT: RECIP_IEEE
define amdgpu_kernel void @test_no_urem24_i32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_no_urem24_i32_2:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s2, s4, 0x1ffffff
+; SI-NEXT: s_and_b32 s4, s5, 0xffffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: s_sub_i32 s5, 0, s4
+; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; SI-NEXT: v_mul_lo_u32 v1, s5, v0
+; SI-NEXT: v_mul_hi_u32 v1, v0, v1
+; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; SI-NEXT: v_mul_hi_u32 v0, s2, v0
+; SI-NEXT: v_readfirstlane_b32 s5, v0
+; SI-NEXT: s_mul_i32 s5, s5, s4
+; SI-NEXT: s_sub_i32 s2, s2, s5
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s2, s5, s2
+; SI-NEXT: s_sub_i32 s5, s2, s4
+; SI-NEXT: s_cmp_ge_u32 s2, s4
+; SI-NEXT: s_cselect_b32 s4, s5, s2
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_no_urem24_i32_2:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s4, s3, 0xffffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; VI-NEXT: s_sub_i32 s3, 0, s4
+; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; VI-NEXT: v_cvt_u32_f32_e32 v0, v0
+; VI-NEXT: v_mul_lo_u32 v1, s3, v0
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_mul_hi_u32 v1, v0, v1
+; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; VI-NEXT: v_mul_hi_u32 v0, s5, v0
+; VI-NEXT: v_readfirstlane_b32 s6, v0
+; VI-NEXT: s_mul_i32 s6, s6, s4
+; VI-NEXT: s_sub_i32 s5, s5, s6
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s5, s6, s5
+; VI-NEXT: s_sub_i32 s6, s5, s4
+; VI-NEXT: s_cmp_ge_u32 s5, s4
+; VI-NEXT: s_cselect_b32 s4, s6, s5
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_no_urem24_i32_2:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: SUB_INT T1.W, 0.0, PV.W,
+; EG-NEXT: RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS,
+; EG-NEXT: MULHI * T0.Z, T0.Y, PS,
+; EG-NEXT: ADD_INT T1.W, T0.Y, PS,
+; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00)
+; EG-NEXT: MULHI * T0.X, PS, PV.W,
+; EG-NEXT: MULLO_INT * T0.X, PS, T0.W,
+; EG-NEXT: SUB_INT * T1.W, T2.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS,
+; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W,
+; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -345,12 +1895,85 @@ define amdgpu_kernel void @test_no_urem24_i32_2(ptr addrspace(1) %out, ptr addrs
ret void
}
-; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32:
-; SI: v_rcp_iflag_f32
-; SI: v_and_b32_e32 v{{[0-9]+}}, 0x7fffff,
-
-; EG: RECIP_IEEE
define amdgpu_kernel void @test_udiv24_u16_u23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_udiv24_u16_u23_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0xffff
+; SI-NEXT: s_and_b32 s5, s5, 0x7fffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_udiv24_u16_u23_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0xffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_udiv24_u16_u23_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W,
+; EG-NEXT: AND_INT T0.W, T0.X, literal.x,
+; EG-NEXT: RECIP_IEEE * T0.X, PS,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: AND_INT T0.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
@@ -363,12 +1986,85 @@ define amdgpu_kernel void @test_udiv24_u16_u23_i32(ptr addrspace(1) %out, ptr ad
ret void
}
-; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32:
-; SI: v_rcp_iflag_f32
-; SI: v_and_b32_e32 v{{[0-9]+}}, 0x7fffff,
-
-; EG: RECIP_IEEE
define amdgpu_kernel void @test_udiv24_u23_u16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: test_udiv24_u23_u16_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_and_b32 s4, s4, 0x7fffff
+; SI-NEXT: s_and_b32 s5, s5, 0xffff
+; SI-NEXT: v_cvt_f32_u32_e32 v0, s4
+; SI-NEXT: v_cvt_f32_u32_e32 v1, s5
+; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1
+; SI-NEXT: v_mul_f32_e32 v2, v0, v2
+; SI-NEXT: v_trunc_f32_e32 v2, v2
+; SI-NEXT: v_fma_f32 v0, -v2, v1, v0
+; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
+; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1
+; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc
+; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: test_udiv24_u23_u16_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_and_b32 s3, s3, 0xffff
+; VI-NEXT: v_cvt_f32_u32_e32 v0, s3
+; VI-NEXT: s_and_b32 s2, s2, 0x7fffff
+; VI-NEXT: v_cvt_f32_u32_e32 v1, s2
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mul_f32_e32 v2, v1, v2
+; VI-NEXT: v_trunc_f32_e32 v2, v2
+; VI-NEXT: v_cvt_u32_f32_e32 v3, v2
+; VI-NEXT: v_mad_f32 v1, -v2, v0, v1
+; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0
+; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc
+; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
+;
+; EG-LABEL: test_udiv24_u23_u16_i32:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, KC0[2].Z,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W,
+; EG-NEXT: AND_INT T0.W, T0.X, literal.x,
+; EG-NEXT: RECIP_IEEE * T0.X, PS,
+; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00)
+; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W,
+; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z,
+; EG-NEXT: TRUNC * T0.W, PV.W,
+; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y,
+; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x,
+; EG-NEXT: FLT_TO_UINT * T0.X, T0.W,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: AND_INT T0.X, PV.W, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45)
%den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%num = load i32, ptr addrspace(1) %in, align 4
%den = load i32, ptr addrspace(1) %den_ptr, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll
new file mode 100644
index 0000000..ea12732
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mcpu=gfx1100 -mtriple=amdgcn-amd-amdhsa -stress-regalloc=4 -filetype=null -verify-machineinstrs %s 2>&1 | FileCheck %s
+
+; CHECK: error: <unknown>:0:0: ran out of registers during register allocation in function 'f'
+; CHECK-NOT: Bad machine code
+
+define <16 x half> @f(i1 %LGV2, <16 x half> %0) {
+BB:
+ br i1 %LGV2, label %SW_C3, label %SW_C
+
+SW_C: ; preds = %BB
+ %B1 = fmul <16 x half> %0, zeroinitializer
+ ret <16 x half> %B1
+
+SW_C3: ; preds = %BB
+ ret <16 x half> <half 0xH0000, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison>
+}
diff --git a/llvm/test/CodeGen/ARM/cmp-select-sign.ll b/llvm/test/CodeGen/ARM/cmp-select-sign.ll
index 298a623..61cdc3b 100644
--- a/llvm/test/CodeGen/ARM/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/ARM/cmp-select-sign.ll
@@ -75,31 +75,31 @@ define i4 @sign_i4(i4 %a) {
define i8 @sign_i8(i8 %a) {
; ARM-LABEL: sign_i8:
; ARM: @ %bb.0:
-; ARM-NEXT: lsl r0, r0, #24
+; ARM-NEXT: sxtb r0, r0
; ARM-NEXT: mov r1, #1
-; ARM-NEXT: orr r0, r1, r0, asr #31
+; ARM-NEXT: orr r0, r1, r0, asr #7
; ARM-NEXT: bx lr
;
; THUMB-LABEL: sign_i8:
; THUMB: @ %bb.0:
-; THUMB-NEXT: lsls r0, r0, #24
-; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: sxtb r0, r0
+; THUMB-NEXT: asrs r1, r0, #7
; THUMB-NEXT: movs r0, #1
; THUMB-NEXT: orrs r0, r1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: sign_i8:
; THUMB2: @ %bb.0:
-; THUMB2-NEXT: lsls r0, r0, #24
+; THUMB2-NEXT: sxtb r0, r0
; THUMB2-NEXT: movs r1, #1
-; THUMB2-NEXT: orr.w r0, r1, r0, asr #31
+; THUMB2-NEXT: orr.w r0, r1, r0, asr #7
; THUMB2-NEXT: bx lr
;
; THUMBV8-LABEL: sign_i8:
; THUMBV8: @ %bb.0:
-; THUMBV8-NEXT: lsls r0, r0, #24
+; THUMBV8-NEXT: sxtb r0, r0
; THUMBV8-NEXT: movs r1, #1
-; THUMBV8-NEXT: orr.w r0, r1, r0, asr #31
+; THUMBV8-NEXT: orr.w r0, r1, r0, asr #7
; THUMBV8-NEXT: bx lr
%c = icmp sgt i8 %a, -1
%res = select i1 %c, i8 1, i8 -1
@@ -109,31 +109,31 @@ define i8 @sign_i8(i8 %a) {
define i16 @sign_i16(i16 %a) {
; ARM-LABEL: sign_i16:
; ARM: @ %bb.0:
-; ARM-NEXT: lsl r0, r0, #16
+; ARM-NEXT: sxth r0, r0
; ARM-NEXT: mov r1, #1
-; ARM-NEXT: orr r0, r1, r0, asr #31
+; ARM-NEXT: orr r0, r1, r0, asr #15
; ARM-NEXT: bx lr
;
; THUMB-LABEL: sign_i16:
; THUMB: @ %bb.0:
-; THUMB-NEXT: lsls r0, r0, #16
-; THUMB-NEXT: asrs r1, r0, #31
+; THUMB-NEXT: sxth r0, r0
+; THUMB-NEXT: asrs r1, r0, #15
; THUMB-NEXT: movs r0, #1
; THUMB-NEXT: orrs r0, r1
; THUMB-NEXT: bx lr
;
; THUMB2-LABEL: sign_i16:
; THUMB2: @ %bb.0:
-; THUMB2-NEXT: lsls r0, r0, #16
+; THUMB2-NEXT: sxth r0, r0
; THUMB2-NEXT: movs r1, #1
-; THUMB2-NEXT: orr.w r0, r1, r0, asr #31
+; THUMB2-NEXT: orr.w r0, r1, r0, asr #15
; THUMB2-NEXT: bx lr
;
; THUMBV8-LABEL: sign_i16:
; THUMBV8: @ %bb.0:
-; THUMBV8-NEXT: lsls r0, r0, #16
+; THUMBV8-NEXT: sxth r0, r0
; THUMBV8-NEXT: movs r1, #1
-; THUMBV8-NEXT: orr.w r0, r1, r0, asr #31
+; THUMBV8-NEXT: orr.w r0, r1, r0, asr #15
; THUMBV8-NEXT: bx lr
%c = icmp sgt i16 %a, -1
%res = select i1 %c, i16 1, i16 -1
diff --git a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
index cda1e83..aa3cdc3 100644
--- a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
+++ b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -1,10 +1,10 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
-;CHECK: _foo
-;CHECK-NOT: vld1.32
-;CHECK-NOT: vst1.32
-;CHECK: bx
define void @foo(ptr %J) {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: bx lr
%A = load <16 x i8>, ptr %J
%T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%T2 = shufflevector <8 x i8> %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/Generic/fp128-exp10-libcall.ll b/llvm/test/CodeGen/Generic/fp128-exp10-libcall.ll
new file mode 100644
index 0000000..5e97f03
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/fp128-exp10-libcall.ll
@@ -0,0 +1,28 @@
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-musl | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-none | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ not llc -mtriple=arm64-apple-macosx -filetype=null %s 2>&1 | FileCheck --check-prefix=ERR %s %}
+; RUN: %if arm-registered-target %{ llc < %s -mtriple=arm-none-eabi | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if arm-registered-target %{ llc < %s -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-musl | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+; RUN: %if riscv-registered-target %{ llc < %s -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if systemz-registered-target %{ llc < %s -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-S390X %}
+; RUN: %if x86-registered-target %{ llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+; RUN: %if x86-registered-target %{ llc < %s -mtriple=i686-unknown-linux-musl | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target %{ llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+; RUN: %if x86-registered-target %{ llc < %s -mtriple=x86_64-unknown-linux-musl | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN %if x86-registered-target %{ llc < %s -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128 %}
+
+; ERR: error: no libcall available for fexp10
+define fp128 @test_exp10(fp128 %a) {
+; CHECK-ALL-LABEL: test_exp10:
+; CHECK-F128: exp10f128
+; CHECK-USELD: exp10l
+; CHECK-S390X: exp10l
+start:
+ %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+ ret fp128 %0
+}
+
diff --git a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
index ccce4bbd..f759c94 100644
--- a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
+++ b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
@@ -95,16 +95,6 @@ start:
ret fp128 %0
}
-define fp128 @test_exp10(fp128 %a) {
-; CHECK-ALL-LABEL: test_exp10:
-; CHECK-F128: exp10f128
-; CHECK-USELD: exp10l
-; CHECK-S390X: exp10l
-start:
- %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
- ret fp128 %0
-}
-
define fp128 @test_exp2(fp128 %a) {
; CHECK-ALL-LABEL: test_exp2:
; CHECK-F128: exp2f128
diff --git a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
new file mode 100644
index 0000000..9988d5b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+%struct.double2 = type { double, double }
+
+declare %struct.double2 @add(ptr align(16) byval(%struct.double2), ptr align(16) byval(%struct.double2))
+
+define void @call_byval(ptr %out, ptr %in1, ptr %in2) {
+; CHECK-LABEL: call_byval(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<12>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [call_byval_param_0];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 16 .b8 param0[16];
+; CHECK-NEXT: .param .align 16 .b8 param1[16];
+; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: ld.param.b64 %rd2, [call_byval_param_2];
+; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd2];
+; CHECK-NEXT: st.param.v2.b64 [param1], {%rd3, %rd4};
+; CHECK-NEXT: ld.param.b64 %rd5, [call_byval_param_1];
+; CHECK-NEXT: ld.v2.b64 {%rd6, %rd7}, [%rd5];
+; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd7};
+; CHECK-NEXT: call.uni (retval0), add, (param0, param1);
+; CHECK-NEXT: ld.param.b64 %rd8, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd9, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.b64 [%rd1+8], %rd8;
+; CHECK-NEXT: st.b64 [%rd1], %rd9;
+; CHECK-NEXT: ret;
+ %call = call %struct.double2 @add(ptr align(16) byval(%struct.double2) %in1, ptr align(16) byval(%struct.double2) %in2)
+ store %struct.double2 %call, ptr %out, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
index 48209a8..dd3e4ec 100644
--- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -12,14 +12,14 @@ define %struct.64 @test_return_type_mismatch(ptr %p) {
; CHECK-NEXT: .reg .b64 %rd<40>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_return_type_mismatch_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: .param .align 1 .b8 retval0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: mov.b64 %rd1, callee;
-; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0;
+; CHECK-NEXT: mov.b64 %rd2, callee;
+; CHECK-NEXT: call (retval0), %rd2, (param0), prototype_0;
; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
@@ -90,16 +90,16 @@ define i64 @test_param_count_mismatch(ptr %p) {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_param_0];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: .param .b64 retval0;
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
; CHECK-NEXT: st.param.b64 [param1], 7;
-; CHECK-NEXT: mov.b64 %rd1, callee;
-; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2;
+; CHECK-NEXT: mov.b64 %rd2, callee;
+; CHECK-NEXT: call (retval0), %rd2, (param0, param1), prototype_2;
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
@@ -114,14 +114,14 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
; CHECK-NEXT: .reg .b64 %rd<40>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_return_type_mismatch_variadic_param_0];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .b64 param0;
; CHECK-NEXT: .param .align 1 .b8 retval0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: mov.b64 %rd1, callee_variadic;
-; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3;
+; CHECK-NEXT: mov.b64 %rd2, callee_variadic;
+; CHECK-NEXT: call (retval0), %rd2, (param0), prototype_3;
; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 38185c7b..045704b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -124,15 +124,15 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-NEXT: .reg .b64 %rd<4>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
-; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
+; PTX-NEXT: mov.b64 %rd1, grid_const_escape_param_0;
+; PTX-NEXT: cvta.param.u64 %rd2, %rd1;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: st.param.b64 [param0], %rd3;
+; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: mov.b64 %rd1, escape;
-; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
+; PTX-NEXT: mov.b64 %rd3, escape;
+; PTX-NEXT: call (retval0), %rd3, (param0), prototype_0;
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
@@ -157,25 +157,25 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %SPL, __local_depot4;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0;
+; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0;
; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1];
-; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2;
-; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
-; PTX-NEXT: cvta.param.u64 %rd5, %rd2;
-; PTX-NEXT: add.u64 %rd6, %SP, 0;
-; PTX-NEXT: add.u64 %rd7, %SPL, 0;
-; PTX-NEXT: st.local.b32 [%rd7], %r1;
+; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2;
+; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd4, %rd1;
+; PTX-NEXT: add.u64 %rd5, %SP, 0;
+; PTX-NEXT: add.u64 %rd6, %SPL, 0;
+; PTX-NEXT: st.local.b32 [%rd6], %r1;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: .param .b64 param1;
; PTX-NEXT: .param .b64 param2;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: st.param.b64 [param2], %rd4;
-; PTX-NEXT: st.param.b64 [param1], %rd6;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
+; PTX-NEXT: st.param.b64 [param2], %rd3;
+; PTX-NEXT: st.param.b64 [param1], %rd5;
+; PTX-NEXT: st.param.b64 [param0], %rd4;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
-; PTX-NEXT: mov.b64 %rd1, escape3;
-; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
+; PTX-NEXT: mov.b64 %rd7, escape3;
+; PTX-NEXT: call (retval0), %rd7, (param0, param1, param2), prototype_1;
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
@@ -256,20 +256,20 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0;
-; PTX-NEXT: ld.param.b64 %rd3, [grid_const_partial_escape_param_1];
-; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: cvta.param.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escape_param_0;
+; PTX-NEXT: ld.param.b64 %rd2, [grid_const_partial_escape_param_1];
+; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd4, %rd1;
; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
-; PTX-NEXT: st.global.b32 [%rd4], %r2;
+; PTX-NEXT: st.global.b32 [%rd3], %r2;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
+; PTX-NEXT: st.param.b64 [param0], %rd4;
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: mov.b64 %rd1, escape;
-; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
+; PTX-NEXT: mov.b64 %rd5, escape;
+; PTX-NEXT: call (retval0), %rd5, (param0), prototype_2;
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
@@ -295,21 +295,21 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0;
-; PTX-NEXT: ld.param.b64 %rd3, [grid_const_partial_escapemem_param_1];
-; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: cvta.param.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escapemem_param_0;
+; PTX-NEXT: ld.param.b64 %rd2, [grid_const_partial_escapemem_param_1];
+; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd4, %rd1;
; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escapemem_param_0];
; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4];
-; PTX-NEXT: st.global.b64 [%rd4], %rd5;
+; PTX-NEXT: st.global.b64 [%rd3], %rd4;
; PTX-NEXT: add.s32 %r3, %r1, %r2;
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
+; PTX-NEXT: st.param.b64 [param0], %rd4;
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
-; PTX-NEXT: mov.b64 %rd1, escape;
-; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
+; PTX-NEXT: mov.b64 %rd5, escape;
+; PTX-NEXT: call (retval0), %rd5, (param0), prototype_3;
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index a592b82..51f6b00 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -150,8 +150,8 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
; CHECK: )
; CHECK: .param .align 16 .b8 param0[12];
; CHECK: .param .align 16 .b8 retval0[12];
- ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
+ ; CHECK-DAG: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.b32 [param0+8], {{%r[0-9]+}};
; CHECK: call.uni (retval0), callee_St4x3, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
@@ -240,8 +240,8 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
; CHECK: )
; CHECK: .param .align 16 .b8 param0[20];
; CHECK: .param .align 16 .b8 retval0[20];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
+ ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.b32 [param0+16], {{%r[0-9]+}};
; CHECK: call.uni (retval0), callee_St4x5, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
@@ -296,8 +296,8 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
; CHECK: .param .align 16 .b8 retval0[24];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x6, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -358,9 +358,9 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
; CHECK: )
; CHECK: .param .align 16 .b8 param0[28];
; CHECK: .param .align 16 .b8 retval0[28];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
+ ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.b32 [param0+24], {{%r[0-9]+}};
; CHECK: call.uni (retval0), callee_St4x7, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -566,8 +566,8 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
; CHECK: .param .align 16 .b8 retval0[24];
- ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
- ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
+ ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
+ ; CHECK-DAG: st.param.b64 [param0+16], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), callee_St8x3, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll b/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll
new file mode 100644
index 0000000..c119da6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: llc --verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64
+
+define ptr @lower_args(ptr %_0, i32 %0, i32 %1, i32 %2, i32 %3, ptr %4, ptr %5, i64 %6, i24 %7) {
+; CHECK-LABEL: lower_args:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: blr
+entry:
+ ret ptr %_0
+}
+
+define i32 @lower_args_withops_zeroext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i24 %i) {
+; CHECK32-LABEL: lower_args_withops_zeroext:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: lwz r3, 56(r1)
+; CHECK32-NEXT: addi r3, r3, 255
+; CHECK32-NEXT: clrlwi r3, r3, 8
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: lower_args_withops_zeroext:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: lwz r3, 116(r1)
+; CHECK64-NEXT: addi r3, r3, 255
+; CHECK64-NEXT: clrldi r3, r3, 40
+; CHECK64-NEXT: blr
+entry:
+ %0 = add i24 %i, 255
+ %1 = zext i24 %0 to i32
+ ret i32 %1
+}
+
+define i32 @lower_args_withops_signext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i24 signext %i) {
+; CHECK32-LABEL: lower_args_withops_signext:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: lwz r3, 56(r1)
+; CHECK32-NEXT: slwi r3, r3, 8
+; CHECK32-NEXT: srawi r3, r3, 8
+; CHECK32-NEXT: slwi r3, r3, 8
+; CHECK32-NEXT: addi r3, r3, 22272
+; CHECK32-NEXT: srawi r3, r3, 8
+; CHECK32-NEXT: blr
+;
+; CHECK64-LABEL: lower_args_withops_signext:
+; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: lwz r3, 116(r1)
+; CHECK64-NEXT: slwi r3, r3, 8
+; CHECK64-NEXT: srawi r3, r3, 8
+; CHECK64-NEXT: addi r3, r3, 87
+; CHECK64-NEXT: sldi r3, r3, 40
+; CHECK64-NEXT: sradi r3, r3, 40
+; CHECK64-NEXT: blr
+entry:
+ %0 = add i24 %i, 87
+ %1 = sext i24 %0 to i32
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index 9f62477..af0942e 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -56,155 +56,153 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: .cfi_offset v29, -240
; CHECK-NEXT: .cfi_offset v30, -224
; CHECK-NEXT: .cfi_offset v31, -208
+; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 2, 728(1)
+; CHECK-NEXT: ld 14, 688(1)
+; CHECK-NEXT: ld 11, 704(1)
+; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 21, 5
+; CHECK-NEXT: lwa 5, 0(7)
+; CHECK-NEXT: ld 7, 720(1)
; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill
; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill
-; CHECK-NEXT: mr 22, 5
-; CHECK-NEXT: ld 5, 848(1)
+; CHECK-NEXT: mr 22, 6
+; CHECK-NEXT: ld 6, 848(1)
; CHECK-NEXT: addi 3, 3, 1
-; CHECK-NEXT: mr 11, 7
-; CHECK-NEXT: ld 23, 688(1)
-; CHECK-NEXT: ld 7, 728(1)
+; CHECK-NEXT: ld 15, 736(1)
; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill
; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT: mr 18, 6
-; CHECK-NEXT: li 6, 9
; CHECK-NEXT: ld 19, 768(1)
-; CHECK-NEXT: ld 2, 760(1)
-; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill
-; CHECK-NEXT: cmpldi 3, 9
-; CHECK-NEXT: ld 27, 816(1)
-; CHECK-NEXT: ld 26, 808(1)
-; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 15, 736(1)
-; CHECK-NEXT: lxv 39, 0(8)
+; CHECK-NEXT: ld 18, 760(1)
; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill
; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 30, 704(1)
-; CHECK-NEXT: lxv 38, 0(9)
-; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 21, 784(1)
+; CHECK-NEXT: ld 12, 696(1)
+; CHECK-NEXT: lxv 0, 0(9)
+; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 1, 0(8)
+; CHECK-NEXT: cmpldi 3, 9
+; CHECK-NEXT: ld 30, 824(1)
+; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 29, 840(1)
+; CHECK-NEXT: ld 28, 832(1)
+; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 23, 784(1)
; CHECK-NEXT: ld 20, 776(1)
; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill
; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill
-; CHECK-NEXT: iselgt 3, 3, 6
-; CHECK-NEXT: ld 6, 720(1)
+; CHECK-NEXT: ld 25, 800(1)
; CHECK-NEXT: ld 24, 792(1)
-; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 27, 816(1)
+; CHECK-NEXT: ld 26, 808(1)
+; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill
+; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 17, 752(1)
+; CHECK-NEXT: extswsli 9, 5, 3
+; CHECK-NEXT: lxv 4, 0(14)
+; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill
+; CHECK-NEXT: mulli 0, 5, 40
+; CHECK-NEXT: sldi 14, 5, 5
+; CHECK-NEXT: mulli 31, 5, 24
+; CHECK-NEXT: lxv 38, 0(2)
+; CHECK-NEXT: lxv 2, 0(11)
+; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT: mulli 2, 5, 48
+; CHECK-NEXT: sldi 5, 5, 4
+; CHECK-NEXT: ld 16, 744(1)
+; CHECK-NEXT: lxv 5, 0(10)
+; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: ld 6, 712(1)
+; CHECK-NEXT: mr 10, 7
+; CHECK-NEXT: add 7, 14, 21
+; CHECK-NEXT: lxv 13, 0(19)
+; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 8, 11
+; CHECK-NEXT: li 11, 9
+; CHECK-NEXT: iselgt 3, 3, 11
; CHECK-NEXT: addi 3, 3, -2
-; CHECK-NEXT: lxv 6, 0(19)
-; CHECK-NEXT: lxv 11, 0(7)
-; CHECK-NEXT: std 5, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 23, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 6, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 5, 840(1)
-; CHECK-NEXT: lxv 12, 0(6)
-; CHECK-NEXT: rldicl 12, 3, 61, 3
+; CHECK-NEXT: rldicl 11, 3, 61, 3
+; CHECK-NEXT: lxv 3, 0(12)
+; CHECK-NEXT: lxv 40, 0(6)
+; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill
; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT: add 19, 21, 5
+; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxv 39, 0(10)
+; CHECK-NEXT: addi 3, 7, 32
+; CHECK-NEXT: add 12, 31, 21
; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 21, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 4, 0(21)
-; CHECK-NEXT: ld 25, 800(1)
-; CHECK-NEXT: lxv 33, 0(10)
-; CHECK-NEXT: lxv 32, 0(23)
-; CHECK-NEXT: lxv 36, 0(30)
-; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 17, 752(1)
-; CHECK-NEXT: ld 16, 744(1)
-; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 29, 712(1)
-; CHECK-NEXT: ld 28, 696(1)
-; CHECK-NEXT: std 8, 56(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 37, 0(28)
-; CHECK-NEXT: lxv 13, 0(29)
-; CHECK-NEXT: mr 8, 29
-; CHECK-NEXT: mr 9, 30
-; CHECK-NEXT: mr 10, 28
-; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 33, 0(15)
+; CHECK-NEXT: lxv 32, 0(16)
; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 10, 0(15)
-; CHECK-NEXT: lxv 9, 0(16)
-; CHECK-NEXT: li 28, 1
-; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill
-; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 8, 0(17)
-; CHECK-NEXT: lxv 7, 0(2)
+; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 37, 0(17)
+; CHECK-NEXT: lxv 36, 0(18)
+; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: lxv 12, 0(20)
+; CHECK-NEXT: lxv 11, 0(23)
+; CHECK-NEXT: add 20, 21, 9
; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill
; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 5, 0(20)
-; CHECK-NEXT: lxv 3, 0(24)
+; CHECK-NEXT: lxv 10, 0(24)
+; CHECK-NEXT: lxv 9, 0(25)
; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill
; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill
-; CHECK-NEXT: lxv 2, 0(25)
-; CHECK-NEXT: lxv 1, 0(26)
+; CHECK-NEXT: lxv 8, 0(26)
+; CHECK-NEXT: lxv 7, 0(27)
+; CHECK-NEXT: addi 12, 12, 32
+; CHECK-NEXT: li 27, 0
+; CHECK-NEXT: mr 26, 21
; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill
-; CHECK-NEXT: lxv 0, 0(27)
+; CHECK-NEXT: lxv 6, 0(30)
+; CHECK-NEXT: lxv 41, 0(28)
+; CHECK-NEXT: addi 7, 11, 1
+; CHECK-NEXT: add 11, 0, 21
+; CHECK-NEXT: li 28, 1
; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill
+; CHECK-NEXT: lxv 43, 0(29)
+; CHECK-NEXT: lxv 42, 0(5)
; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill
+; CHECK-NEXT: addi 11, 11, 32
; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 5, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 5, 832(1)
; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill
; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill
-; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill
; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill
; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 2, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 5, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT: ld 5, 824(1)
-; CHECK-NEXT: std 5, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT: lwa 5, 0(11)
-; CHECK-NEXT: li 27, 0
-; CHECK-NEXT: ld 7, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT: mulli 6, 5, 40
-; CHECK-NEXT: sldi 0, 5, 4
-; CHECK-NEXT: extswsli 14, 5, 3
-; CHECK-NEXT: lxv 40, 0(7)
-; CHECK-NEXT: ld 7, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT: add 31, 14, 22
-; CHECK-NEXT: add 11, 0, 22
-; CHECK-NEXT: mr 26, 22
-; CHECK-NEXT: addi 3, 11, 32
-; CHECK-NEXT: addi 11, 12, 1
-; CHECK-NEXT: mulli 12, 5, 48
-; CHECK-NEXT: addi 31, 31, 32
-; CHECK-NEXT: add 19, 22, 6
-; CHECK-NEXT: sldi 6, 5, 5
-; CHECK-NEXT: mulli 5, 5, 24
-; CHECK-NEXT: lxv 41, 0(7)
-; CHECK-NEXT: add 20, 22, 6
-; CHECK-NEXT: add 21, 22, 5
-; CHECK-NEXT: ld 5, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT: lxv 43, 0(5)
-; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT: lxv 42, 0(5)
+; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_4 Depth 2
-; CHECK-NEXT: maddld 5, 12, 27, 0
-; CHECK-NEXT: mr 6, 18
-; CHECK-NEXT: mr 29, 21
+; CHECK-NEXT: maddld 5, 2, 27, 0
+; CHECK-NEXT: mr 6, 22
; CHECK-NEXT: mr 30, 20
-; CHECK-NEXT: mr 2, 19
-; CHECK-NEXT: mtctr 11
-; CHECK-NEXT: add 25, 22, 5
-; CHECK-NEXT: maddld 5, 12, 27, 14
-; CHECK-NEXT: add 24, 22, 5
+; CHECK-NEXT: mr 29, 19
+; CHECK-NEXT: mtctr 7
+; CHECK-NEXT: add 25, 21, 5
+; CHECK-NEXT: maddld 5, 2, 27, 14
+; CHECK-NEXT: add 24, 21, 5
+; CHECK-NEXT: maddld 5, 2, 27, 31
+; CHECK-NEXT: add 23, 21, 5
; CHECK-NEXT: mr 5, 26
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_4: # %_loop_2_do_
@@ -212,66 +210,66 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lxvp 34, 0(6)
; CHECK-NEXT: lxvp 44, 0(5)
-; CHECK-NEXT: xvmaddadp 39, 45, 35
-; CHECK-NEXT: lxvp 46, 0(24)
-; CHECK-NEXT: xvmaddadp 38, 47, 35
-; CHECK-NEXT: lxvp 48, 0(25)
-; CHECK-NEXT: lxvp 50, 0(29)
-; CHECK-NEXT: lxvp 62, 0(30)
-; CHECK-NEXT: lxvp 60, 0(2)
+; CHECK-NEXT: xvmaddadp 1, 45, 35
+; CHECK-NEXT: lxvp 46, 0(30)
+; CHECK-NEXT: xvmaddadp 0, 47, 35
+; CHECK-NEXT: lxvp 48, 0(29)
+; CHECK-NEXT: lxvp 50, 0(23)
+; CHECK-NEXT: lxvp 62, 0(24)
+; CHECK-NEXT: lxvp 60, 0(25)
; CHECK-NEXT: lxvp 58, 32(6)
; CHECK-NEXT: lxvp 56, 32(5)
-; CHECK-NEXT: lxvp 54, 32(24)
-; CHECK-NEXT: lxvp 52, 32(25)
-; CHECK-NEXT: lxvp 30, 32(29)
-; CHECK-NEXT: lxvp 28, 32(30)
-; CHECK-NEXT: lxvp 26, 32(2)
-; CHECK-NEXT: xvmaddadp 33, 49, 35
-; CHECK-NEXT: xvmaddadp 32, 51, 35
-; CHECK-NEXT: xvmaddadp 37, 63, 35
-; CHECK-NEXT: xvmaddadp 36, 61, 35
-; CHECK-NEXT: xvmaddadp 13, 44, 34
-; CHECK-NEXT: xvmaddadp 12, 46, 34
-; CHECK-NEXT: xvmaddadp 11, 48, 34
-; CHECK-NEXT: xvmaddadp 10, 50, 34
-; CHECK-NEXT: xvmaddadp 9, 62, 34
-; CHECK-NEXT: xvmaddadp 8, 60, 34
-; CHECK-NEXT: xvmaddadp 7, 57, 59
-; CHECK-NEXT: xvmaddadp 6, 55, 59
-; CHECK-NEXT: xvmaddadp 5, 53, 59
-; CHECK-NEXT: xvmaddadp 4, 31, 59
-; CHECK-NEXT: xvmaddadp 3, 29, 59
-; CHECK-NEXT: xvmaddadp 2, 27, 59
-; CHECK-NEXT: xvmaddadp 1, 56, 58
-; CHECK-NEXT: xvmaddadp 0, 54, 58
-; CHECK-NEXT: xvmaddadp 40, 52, 58
+; CHECK-NEXT: lxvp 54, 32(30)
+; CHECK-NEXT: lxvp 52, 32(29)
+; CHECK-NEXT: lxvp 30, 32(23)
+; CHECK-NEXT: lxvp 28, 32(24)
+; CHECK-NEXT: lxvp 26, 32(25)
+; CHECK-NEXT: xvmaddadp 5, 49, 35
+; CHECK-NEXT: xvmaddadp 4, 51, 35
+; CHECK-NEXT: xvmaddadp 3, 63, 35
+; CHECK-NEXT: xvmaddadp 2, 61, 35
+; CHECK-NEXT: xvmaddadp 40, 44, 34
+; CHECK-NEXT: xvmaddadp 39, 46, 34
+; CHECK-NEXT: xvmaddadp 38, 48, 34
+; CHECK-NEXT: xvmaddadp 33, 50, 34
+; CHECK-NEXT: xvmaddadp 32, 62, 34
+; CHECK-NEXT: xvmaddadp 37, 60, 34
+; CHECK-NEXT: xvmaddadp 36, 57, 59
+; CHECK-NEXT: xvmaddadp 13, 55, 59
+; CHECK-NEXT: xvmaddadp 12, 53, 59
+; CHECK-NEXT: xvmaddadp 11, 31, 59
+; CHECK-NEXT: xvmaddadp 10, 29, 59
+; CHECK-NEXT: xvmaddadp 9, 27, 59
+; CHECK-NEXT: xvmaddadp 8, 56, 58
+; CHECK-NEXT: xvmaddadp 7, 54, 58
+; CHECK-NEXT: xvmaddadp 6, 52, 58
; CHECK-NEXT: xvmaddadp 41, 30, 58
; CHECK-NEXT: xvmaddadp 43, 28, 58
; CHECK-NEXT: xvmaddadp 42, 26, 58
; CHECK-NEXT: addi 6, 6, 64
; CHECK-NEXT: addi 5, 5, 64
+; CHECK-NEXT: addi 30, 30, 64
+; CHECK-NEXT: addi 29, 29, 64
+; CHECK-NEXT: addi 23, 23, 64
; CHECK-NEXT: addi 24, 24, 64
; CHECK-NEXT: addi 25, 25, 64
-; CHECK-NEXT: addi 29, 29, 64
-; CHECK-NEXT: addi 30, 30, 64
-; CHECK-NEXT: addi 2, 2, 64
; CHECK-NEXT: bdnz .LBB0_4
; CHECK-NEXT: # %bb.5: # %_loop_2_endl_
; CHECK-NEXT: #
; CHECK-NEXT: addi 28, 28, 6
-; CHECK-NEXT: add 26, 26, 12
-; CHECK-NEXT: add 31, 31, 12
-; CHECK-NEXT: add 19, 19, 12
-; CHECK-NEXT: add 3, 3, 12
-; CHECK-NEXT: add 20, 20, 12
-; CHECK-NEXT: add 21, 21, 12
+; CHECK-NEXT: add 26, 26, 2
+; CHECK-NEXT: add 20, 20, 2
+; CHECK-NEXT: add 11, 11, 2
+; CHECK-NEXT: add 19, 19, 2
+; CHECK-NEXT: add 3, 3, 2
+; CHECK-NEXT: add 12, 12, 2
; CHECK-NEXT: addi 27, 27, 1
; CHECK-NEXT: cmpld 28, 4
; CHECK-NEXT: ble 0, .LBB0_3
; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
-; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload
-; CHECK-NEXT: stxv 39, 0(3)
+; CHECK-NEXT: stxv 1, 0(3)
; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload
; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload
@@ -284,7 +282,7 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload
-; CHECK-NEXT: stxv 38, 0(3)
+; CHECK-NEXT: stxv 0, 0(3)
; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload
@@ -297,8 +295,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 33, 0(3)
-; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 5, 0(3)
+; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload
@@ -310,40 +308,41 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 32, 0(3)
-; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 37, 0(10)
-; CHECK-NEXT: stxv 36, 0(9)
-; CHECK-NEXT: stxv 13, 0(8)
+; CHECK-NEXT: stxv 4, 0(3)
+; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 12, 0(3)
+; CHECK-NEXT: stxv 3, 0(3)
+; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 2, 0(8)
+; CHECK-NEXT: stxv 40, 0(3)
; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 11, 0(3)
+; CHECK-NEXT: stxv 39, 0(10)
+; CHECK-NEXT: stxv 38, 0(3)
; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 10, 0(3)
+; CHECK-NEXT: stxv 33, 0(3)
; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 9, 0(3)
+; CHECK-NEXT: stxv 32, 0(3)
; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 8, 0(3)
+; CHECK-NEXT: stxv 37, 0(3)
; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 7, 0(3)
+; CHECK-NEXT: stxv 36, 0(3)
; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 6, 0(3)
+; CHECK-NEXT: stxv 13, 0(3)
; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 5, 0(3)
+; CHECK-NEXT: stxv 12, 0(3)
; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 4, 0(3)
+; CHECK-NEXT: stxv 11, 0(3)
; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 3, 0(3)
+; CHECK-NEXT: stxv 10, 0(3)
; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 2, 0(3)
+; CHECK-NEXT: stxv 9, 0(3)
; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 1, 0(3)
+; CHECK-NEXT: stxv 8, 0(3)
; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 0, 0(3)
+; CHECK-NEXT: stxv 7, 0(3)
; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT: stxv 40, 0(3)
+; CHECK-NEXT: stxv 6, 0(3)
; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload
; CHECK-NEXT: stxv 41, 0(3)
; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
index 799ba63..8fb4c21 100644
--- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
+++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll
@@ -40,9 +40,10 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: # %if.end9
; CHECK-NEXT: #
-; CHECK-NEXT: lwzx 10, 6, 9
+; CHECK-NEXT: add 9, 3, 9
+; CHECK-NEXT: lwz 10, 4(9)
; CHECK-NEXT: addi 10, 10, 1
-; CHECK-NEXT: stwx 10, 6, 9
+; CHECK-NEXT: stw 10, 4(9)
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_5: # %if.then
; CHECK-NEXT: lwax 3, 9, 3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll
new file mode 100644
index 0000000..dd63fa0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+zve64x,+zvl128b < %s | FileCheck %s
+
+define {<8 x i8>, <8 x i8>} @load_factor2(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg2e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.sseg2.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret {<8 x i8>, <8 x i8>} %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>} @load_factor3(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg3e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg3.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor4(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg4e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg4.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor5(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg5e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg5.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor6(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg6e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg6.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor7(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg7e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg7.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
+
+define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor8(ptr %ptr, i64 %stride) {
+; CHECK-LABEL: load_factor8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vlsseg8e8.v v8, (a0), a1
+; CHECK-NEXT: ret
+ %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg8.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8)
+ ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
new file mode 100644
index 0000000..ac3cd84
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mattr='+v' < %s | FileCheck %s
+
+define <2 x i8> @fp4(<4 x i4> %0) nounwind {
+; CHECK-LABEL: fp4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: vslidedown.vi v9, v8, 1
+; CHECK-NEXT: vmv.x.s a1, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
+; CHECK-NEXT: andi a0, a0, 15
+; CHECK-NEXT: vmv.x.s a2, v9
+; CHECK-NEXT: andi a1, a1, 15
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: vmv.x.s a1, v8
+; CHECK-NEXT: andi a2, a2, 15
+; CHECK-NEXT: slli a1, a1, 12
+; CHECK-NEXT: slli a2, a2, 8
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: or a0, a0, a1
+; CHECK-NEXT: sh a0, 14(sp)
+; CHECK-NEXT: addi a0, sp, 14
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %2 = bitcast <4 x i4> %0 to <2 x i8>
+ ret <2 x i8> %2
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 3dc83d5..38d38f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -1636,3 +1636,49 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b,
%res = call <8 x half> @llvm.vector.interleave8.v8f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g, <1 x half> %h)
ret <8 x half> %res
}
+
+define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
+; CHECK-LABEL: interleave4_const_splat_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_const_splat_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave4_const_splat_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vmv.v.i v8, 3
+; ZIP-NEXT: ret
+ %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
+ ret <8 x i16> %retval
+}
+
+define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) {
+; CHECK-LABEL: interleave4_same_nonconst_splat_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_same_nonconst_splat_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave4_same_nonconst_splat_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vmv.v.x v8, a0
+; ZIP-NEXT: ret
+ %ins = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer
+ %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat)
+ ret <8 x i16> %retval
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 01cc5c5..ee38257 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -14947,3 +14947,147 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv2f64(<vscale x 2 x
%res = call <vscale x 16 x double> @llvm.vector.interleave8.nxv16f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5, <vscale x 2 x double> %v6, <vscale x 2 x double> %v7)
ret <vscale x 16 x double> %res
}
+
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+; CHECK-LABEL: interleave2_same_const_splat_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_same_const_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
+; V-LABEL: interleave2_diff_const_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.i v9, 3
+; V-NEXT: li a0, 4
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: vwaddu.vx v8, v9, a0
+; V-NEXT: vwmaccu.vx v8, a0, v10
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_diff_const_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 4
+; ZVBB-NEXT: li a0, 3
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a0
+; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_diff_const_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.i v9, 4
+; ZIP-NEXT: vmv.v.i v10, 3
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: ri.vzip2b.vv v11, v10, v9
+; ZIP-NEXT: ri.vzip2a.vv v8, v10, v9
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v11, a0
+; ZIP-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
+; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a0
+; ZVBB-NEXT: ret
+ %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) {
+; V-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.x v9, a0
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: vwaddu.vx v8, v9, a1
+; V-NEXT: vwmaccu.vx v8, a1, v10
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a1
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a1
+; ZVBB-NEXT: vslideup.vx v8, v9, a1
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.x v9, a0
+; ZIP-NEXT: vmv.v.x v10, a1
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: ri.vzip2b.vv v11, v9, v10
+; ZIP-NEXT: ri.vzip2a.vv v8, v9, v10
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v11, a0
+; ZIP-NEXT: ret
+ %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
+; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_same_const_splat_nxv8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: ret
+ %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 8 x i16> %retval
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll
index f29c74a..697c582 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll
@@ -21,7 +21,7 @@ define <vscale x 4 x i32> @intrinsic_vsha2cl_vv_nxv4i32_nxv4i32(<vscale x 4 x i3
; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; CHECK-NEXT: vsha2ch.vv v8, v10, v12
+; CHECK-NEXT: vsha2cl.vv v8, v10, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vsha2cl.nxv4i32.nxv4i32(
@@ -45,7 +45,7 @@ define <vscale x 8 x i32> @intrinsic_vsha2cl_vv_nxv8i32_nxv8i32(<vscale x 8 x i3
; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
-; CHECK-NEXT: vsha2ch.vv v8, v12, v16
+; CHECK-NEXT: vsha2cl.vv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vsha2cl.nxv8i32.nxv8i32(
@@ -70,7 +70,7 @@ define <vscale x 16 x i32> @intrinsic_vsha2cl_vv_nxv16i32_nxv16i32(<vscale x 16
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
-; CHECK-NEXT: vsha2ch.vv v8, v16, v24
+; CHECK-NEXT: vsha2cl.vv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vsha2cl.nxv16i32.nxv16i32(
@@ -94,7 +94,7 @@ define <vscale x 4 x i64> @intrinsic_vsha2cl_vv_nxv4i64_nxv4i64(<vscale x 4 x i6
; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
-; CHECK-NEXT: vsha2ch.vv v8, v12, v16
+; CHECK-NEXT: vsha2cl.vv v8, v12, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vsha2cl.nxv4i64.nxv4i64(
@@ -119,7 +119,7 @@ define <vscale x 8 x i64> @intrinsic_vsha2cl_vv_nxv8i64_nxv8i64(<vscale x 8 x i6
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vl8re64.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
-; CHECK-NEXT: vsha2ch.vv v8, v16, v24
+; CHECK-NEXT: vsha2cl.vv v8, v16, v24
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vsha2cl.nxv8i64.nxv8i64(
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 09b065a..048ce96 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -117,3 +117,22 @@ entyr:
store i64 0, ptr @g
ret void
}
+
+define void @large_offset(ptr nocapture %p, i64 %d) nounwind {
+; CHECK-LABEL: large_offset:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui a1, 4
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: ld a2, -384(a0)
+; CHECK-NEXT: addi a2, a2, 1
+; CHECK-NEXT: seqz a1, a2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: sd a2, -384(a0)
+; CHECK-NEXT: ret
+entry:
+ %add.ptr = getelementptr inbounds i64, ptr %p, i64 2000
+ %a = load i64, ptr %add.ptr, align 8
+ %b = add i64 %a, 1
+ store i64 %b, ptr %add.ptr, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
new file mode 100644
index 0000000..fa708ab
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: result and argument must have the same number of components
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(<8 x float> %in) {
+ %res = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ ret void
+}
+
+declare spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
new file mode 100644
index 0000000..630b2fd
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: result and argument must have the same number of components
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(<8 x float> %in) {
+ %res = tail call spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ ret void
+}
+
+declare spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
new file mode 100644
index 0000000..dcad78d
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -0,0 +1,62 @@
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - -filetype=obj | spirv-val %}
+
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: the builtin requires the following SPIR-V extension: SPV_INTEL_tensor_float32_conversion
+
+; CHECK: OpCapability TensorFloat32RoundingINTEL
+; CHECK: OpExtension "SPV_INTEL_tensor_float32_conversion"
+
+; CHECK-DAG: %[[VoidTy:.*]] = OpTypeVoid
+; CHECK-DAG: %[[FP32Ty:.*]] = OpTypeFloat 32
+; CHECK-DAG: %[[VecFloat2:.*]] = OpTypeVector %[[FP32Ty]] 2
+; CHECK-DAG: %[[VecFloat3:.*]] = OpTypeVector %[[FP32Ty]] 3
+; CHECK-DAG: %[[VecFloat4:.*]] = OpTypeVector %[[FP32Ty]] 4
+; CHECK-DAG: %[[VecFloat8:.*]] = OpTypeVector %[[FP32Ty]] 8
+; CHECK-DAG: %[[VecFloat16:.*]] = OpTypeVector %[[FP32Ty]] 16
+; CHECK-DAG: %[[FloatConstId:.*]] = OpConstant %[[FP32Ty]] 1.5
+
+; CHECK: OpFunction %[[VoidTy]]
+; CHECK: %[[FP32ValId:.*]] = OpFunctionParameter %[[FP32Ty]]
+; CHECK: %[[FP32v8ValId:.*]] = OpFunctionParameter %[[VecFloat8]]
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FP32ValId]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]] %[[FP32v8ValId]]
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FloatConstId]]
+
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat2]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat3]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat4]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat16]]
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(float %a, <8 x float> %in) {
+ %res1 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float %a)
+ %res2 = tail call spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res3 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float 1.500000e+00)
+ ret void
+}
+
+declare spir_func float @_Z25__spirv_RoundFToTF32INTELf(float)
+declare spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
+
+define dso_local spir_kernel void @test_ocl(float %a) {
+entry:
+ %res4 = call spir_func float @_Z35intel_round_as_tensor_float32_floatt(float 0.000000e+00)
+ %res5 = call spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float> zeroinitializer)
+ %res6 = call spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float> zeroinitializer)
+ %res7 = call spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float> zeroinitializer)
+ %res8 = call spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float> zeroinitializer)
+ %res9 = call spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float> zeroinitializer)
+ ret void
+}
+
+declare spir_func float @_Z35intel_round_as_tensor_float32_floatt(float)
+declare spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float>)
+declare spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float>)
+declare spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float>)
+declare spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float>)
+declare spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll
index 085f8b3..9d07b63 100644
--- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll
+++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll
@@ -33,7 +33,7 @@ define spir_func void @foo(ptr noundef byval(%tprange) align 8 %_arg_UserRange)
%RoundedRangeKernel = alloca %tprange, align 8
call void @llvm.lifetime.start.p0(i64 72, ptr nonnull %RoundedRangeKernel)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %RoundedRangeKernel, ptr align 8 %_arg_UserRange, i64 16, i1 false)
- %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 16
+ %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8
call void @llvm.lifetime.end.p0(i64 72, ptr nonnull %RoundedRangeKernel)
ret void
}
@@ -55,7 +55,7 @@ define spir_func void @bar(ptr noundef byval(%tprange) align 8 %_arg_UserRange)
%RoundedRangeKernel = alloca %tprange, align 8
call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull %RoundedRangeKernel)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %RoundedRangeKernel, ptr align 8 %_arg_UserRange, i64 16, i1 false)
- %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 16
+ %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8
call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %RoundedRangeKernel)
ret void
}
diff --git a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll
index a1ff1e0..66337b1 100644
--- a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll
+++ b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -print-after-all | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
; CHECK-DAG: [[uint:%[0-9]+]] = OpTypeInt 32 0
diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll
new file mode 100644
index 0000000..26dc60e
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll
@@ -0,0 +1,46 @@
+; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
+
+%struct.S1 = type { <4 x i32>, [10 x <4 x float>], <4 x float> }
+%struct.S2 = type { <4 x float>, <4 x i32> }
+
+@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1
+
+define <4 x float> @main() {
+entry:
+ %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %3 = tail call noundef align 1 dereferenceable(192) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 0)
+
+; CHECK-DAG: %[[#ulong:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#ulong_1:]] = OpConstant %[[#ulong]] 1
+; CHECK-DAG: %[[#ulong_3:]] = OpConstant %[[#ulong]] 3
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#uint_0:]] = OpConstant %[[#uint]] 0
+; CHECK-DAG: %[[#uint_10:]] = OpConstant %[[#uint]] 10
+
+; CHECK-DAG: %[[#float:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#v4f:]] = OpTypeVector %[[#float]] 4
+; CHECK-DAG: %[[#arr_v4f:]] = OpTypeArray %[[#v4f]] %[[#uint_10]]
+; CHECK-DAG: %[[#S1:]] = OpTypeStruct %[[#]] %[[#arr_v4f]] %[[#]]
+; CHECK-DAG: %[[#sb_S1:]] = OpTypePointer StorageBuffer %[[#S1]]
+; CHECK-DAG: %[[#sb_v4f:]] = OpTypePointer StorageBuffer %[[#v4f]]
+
+; CHECK: %[[#tmp:]] = OpAccessChain %[[#sb_S1]] %[[#]] %[[#uint_0]] %[[#uint_0]]
+; CHECK: %[[#ptr:]] = OpInBoundsAccessChain %[[#sb_v4f]] %[[#tmp]] %[[#ulong_1]] %[[#ulong_3]]
+; This rewritten GEP combined all constant indices into a single value.
+; We should make sure the correct indices are retrieved.
+ %arrayidx.i = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 64
+
+; CHECK: OpLoad %[[#v4f]] %[[#ptr]]
+ %4 = load <4 x float>, ptr addrspace(11) %arrayidx.i, align 1
+
+ ret <4 x float> %4
+}
+
+declare i32 @llvm.spv.flattened.thread.id.in.group()
+declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr)
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32)
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll
new file mode 100644
index 0000000..a6efb38
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll
@@ -0,0 +1,54 @@
+; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
+
+%struct.S1 = type { <4 x i32>, [10 x <4 x float>], <4 x float> }
+%struct.S2 = type { <4 x float>, <4 x i32> }
+
+@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1
+
+define <4 x float> @main(i32 %index) {
+entry:
+ %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %3 = tail call noundef align 1 dereferenceable(192) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 0)
+
+; CHECK-DAG: %[[#ulong:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#ulong_1:]] = OpConstant %[[#ulong]] 1
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#uint_0:]] = OpConstant %[[#uint]] 0
+; CHECK-DAG: %[[#uint_10:]] = OpConstant %[[#uint]] 10
+; CHECK-DAG: %[[#uint_16:]] = OpConstant %[[#uint]] 16
+
+; CHECK-DAG: %[[#float:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#v4f:]] = OpTypeVector %[[#float]] 4
+; CHECK-DAG: %[[#arr_v4f:]] = OpTypeArray %[[#v4f]] %[[#uint_10]]
+; CHECK-DAG: %[[#S1:]] = OpTypeStruct %[[#]] %[[#arr_v4f]] %[[#]]
+; CHECK-DAG: %[[#sb_S1:]] = OpTypePointer StorageBuffer %[[#S1]]
+; CHECK-DAG: %[[#sb_arr_v4f:]] = OpTypePointer StorageBuffer %[[#arr_v4f]]
+; CHECK-DAG: %[[#sb_v4f:]] = OpTypePointer StorageBuffer %[[#v4f]]
+
+; CHECK: %[[#a:]] = OpAccessChain %[[#sb_S1]] %[[#]] %[[#uint_0]] %[[#uint_0]]
+; CHECK: %[[#b:]] = OpInBoundsAccessChain %[[#sb_arr_v4f]] %[[#a]] %[[#ulong_1]]
+ %4 = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 16
+
+; CHECK: %[[#offset:]] = OpIMul %[[#]] %[[#]] %[[#uint_16]]
+; Offset is computed in bytes. Make sure we reconvert it back to an index.
+ %offset = mul i32 %index, 16
+
+; CHECK: %[[#index:]] = OpUDiv %[[#]] %[[#offset]] %[[#uint_16]]
+; CHECK: %[[#c:]] = OpInBoundsAccessChain %[[#sb_v4f]] %[[#b]] %[[#index]]
+ %5 = getelementptr inbounds nuw i8, ptr addrspace(11) %4, i32 %offset
+
+; CHECK: OpLoad %[[#v4f]] %[[#c]]
+ %6 = load <4 x float>, ptr addrspace(11) %5, align 1
+
+ ret <4 x float> %6
+}
+
+declare i32 @llvm.spv.flattened.thread.id.in.group()
+declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr)
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32)
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+
diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll
new file mode 100644
index 0000000..8e6b5a6
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %}
+
+; struct S1 {
+; int4 i;
+; float4 f;
+; };
+; struct S2 {
+; float4 f;
+; int4 i;
+; };
+;
+; StructuredBuffer<S1> In : register(t1);
+; RWStructuredBuffer<S2> Out : register(u0);
+;
+; [numthreads(1,1,1)]
+; void main(uint GI : SV_GroupIndex) {
+; Out[GI].f = In[GI].f;
+; Out[GI].i = In[GI].i;
+; }
+
+%struct.S1 = type { <4 x i32>, <4 x float> }
+%struct.S2 = type { <4 x float>, <4 x i32> }
+
+@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1
+@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1
+
+define void @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main
+; CHECK: %43 = OpFunction %2 None %3 ; -- Begin function main
+; CHECK-NEXT: %1 = OpLabel
+; CHECK-NEXT: %44 = OpVariable %28 Function %38
+; CHECK-NEXT: %45 = OpVariable %27 Function %39
+; CHECK-NEXT: %46 = OpCopyObject %19 %40
+; CHECK-NEXT: %47 = OpCopyObject %16 %41
+; CHECK-NEXT: %48 = OpLoad %4 %42
+; CHECK-NEXT: %49 = OpAccessChain %13 %46 %29 %48
+; CHECK-NEXT: %50 = OpInBoundsAccessChain %9 %49 %31
+; CHECK-NEXT: %51 = OpLoad %8 %50 Aligned 1
+; CHECK-NEXT: %52 = OpAccessChain %11 %47 %29 %48
+; CHECK-NEXT: %53 = OpInBoundsAccessChain %9 %52 %29
+; CHECK-NEXT: OpStore %53 %51 Aligned 1
+; CHECK-NEXT: %54 = OpAccessChain %6 %49 %29
+; CHECK-NEXT: %55 = OpLoad %5 %54 Aligned 1
+; CHECK-NEXT: %56 = OpInBoundsAccessChain %6 %52 %31
+; CHECK-NEXT: OpStore %56 %55 Aligned 1
+; CHECK-NEXT: OpReturn
+; CHECK-NEXT: OpFunctionEnd
+entry:
+ %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ %1 = tail call target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str.2)
+ %2 = tail call i32 @llvm.spv.flattened.thread.id.in.group()
+ %3 = tail call noundef align 1 dereferenceable(32) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 %2)
+ %f.i = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 16
+ %4 = load <4 x float>, ptr addrspace(11) %f.i, align 1
+ %5 = tail call noundef align 1 dereferenceable(32) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) %1, i32 %2)
+ store <4 x float> %4, ptr addrspace(11) %5, align 1
+ %6 = load <4 x i32>, ptr addrspace(11) %3, align 1
+ %i6.i = getelementptr inbounds nuw i8, ptr addrspace(11) %5, i64 16
+ store <4 x i32> %6, ptr addrspace(11) %i6.i, align 1
+ ret void
+}
+
+declare i32 @llvm.spv.flattened.thread.id.in.group()
+
+declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr)
+
+declare target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(i32, i32, i32, i32, i1, ptr)
+
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1), i32)
+
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32)
+
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
diff --git a/llvm/test/CodeGen/X86/avx512f-large-stack.ll b/llvm/test/CodeGen/X86/avx512f-large-stack.ll
new file mode 100644
index 0000000..326f72b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512f-large-stack.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4
+; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f < %s | FileCheck %s --check-prefix=CHECK
+define void @f(i16 %LGV2, i1 %LGV3) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0: # %BB
+; CHECK-NEXT: subq $2147483528, %rsp # imm = 0x7FFFFF88
+; CHECK-NEXT: .cfi_def_cfa_offset 2147483536
+; CHECK-NEXT: movb %sil, %cl
+; CHECK-NEXT: movw %di, %ax
+; CHECK-NEXT: movswq %ax, %rax
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movabsq $-2147483768, %rdx # imm = 0xFFFFFFFF7FFFFF88
+; CHECK-NEXT: movb %cl, (%rsp,%rdx)
+; CHECK-NEXT: addq $2147483528, %rsp # imm = 0x7FFFFF88
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+BB:
+ %A = alloca i1, i33 2147483648, align 1
+ %G = getelementptr i1, ptr %A, i16 %LGV2
+ %G4 = getelementptr i1, ptr %G, i32 -2147483648
+ store i1 %LGV3, ptr %G4, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index f3e1417..ed3f0e0 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -956,3 +956,185 @@ define i1 @fold_test_and_with_chain(ptr %x, ptr %y, i32 %z) {
store i32 %z, ptr %y
ret i1 %c
}
+
+define i1 @sext_mask(i32 %a) {
+; CHECK-LABEL: sext_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $-523, %edi # encoding: [0x81,0xff,0xf5,0xfd,0xff,0xff]
+; CHECK-NEXT: # imm = 0xFDF5
+; CHECK-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i32 %a to i64
+ %v1 = icmp slt i64 %a64, -523
+ ret i1 %v1
+}
+
+define i1 @sext_i9_mask(i9 %a) {
+; NO-NDD-LABEL: sext_i9_mask:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: # kill: def $edi killed $edi def $rdi
+; NO-NDD-NEXT: shlq $55, %rdi # encoding: [0x48,0xc1,0xe7,0x37]
+; NO-NDD-NEXT: sarq $55, %rdi # encoding: [0x48,0xc1,0xff,0x37]
+; NO-NDD-NEXT: cmpl $-522, %edi # encoding: [0x81,0xff,0xf6,0xfd,0xff,0xff]
+; NO-NDD-NEXT: # imm = 0xFDF6
+; NO-NDD-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: sext_i9_mask:
+; NDD: # %bb.0:
+; NDD-NEXT: # kill: def $edi killed $edi def $rdi
+; NDD-NEXT: shlq $55, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x37]
+; NDD-NEXT: sarq $55, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xff,0x37]
+; NDD-NEXT: cmpl $-522, %edi # encoding: [0x81,0xff,0xf6,0xfd,0xff,0xff]
+; NDD-NEXT: # imm = 0xFDF6
+; NDD-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i9 %a to i64
+ %v1 = icmp slt i64 %a64, -522
+ ret i1 %v1
+}
+
+define i1 @sext_i32_mask(i32 %a) {
+; CHECK-LABEL: sext_i32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $-522, %edi # encoding: [0x81,0xff,0xf6,0xfd,0xff,0xff]
+; CHECK-NEXT: # imm = 0xFDF6
+; CHECK-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i32 %a to i64
+ %v1 = icmp slt i64 %a64, -522
+ ret i1 %v1
+}
+
+define i1 @i40(i40 %a) {
+; NO-NDD-LABEL: i40:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: shlq $24, %rdi # encoding: [0x48,0xc1,0xe7,0x18]
+; NO-NDD-NEXT: sarq $24, %rdi # encoding: [0x48,0xc1,0xff,0x18]
+; NO-NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NO-NDD-NEXT: # imm = 0xFDF7
+; NO-NDD-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: i40:
+; NDD: # %bb.0:
+; NDD-NEXT: shlq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x18]
+; NDD-NEXT: sarq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xff,0x18]
+; NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NDD-NEXT: # imm = 0xFDF7
+; NDD-NEXT: setl %al # encoding: [0x0f,0x9c,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i40 %a to i64
+ %v1 = icmp slt i64 %a64, -521
+ ret i1 %v1
+}
+
+define i1 @sext_i9_mask_sgt(i9 %a) {
+; NO-NDD-LABEL: sext_i9_mask_sgt:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: # kill: def $edi killed $edi def $rdi
+; NO-NDD-NEXT: shlq $55, %rdi # encoding: [0x48,0xc1,0xe7,0x37]
+; NO-NDD-NEXT: sarq $55, %rdi # encoding: [0x48,0xc1,0xff,0x37]
+; NO-NDD-NEXT: cmpl $-520, %edi # encoding: [0x81,0xff,0xf8,0xfd,0xff,0xff]
+; NO-NDD-NEXT: # imm = 0xFDF8
+; NO-NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: sext_i9_mask_sgt:
+; NDD: # %bb.0:
+; NDD-NEXT: # kill: def $edi killed $edi def $rdi
+; NDD-NEXT: shlq $55, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x37]
+; NDD-NEXT: sarq $55, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xff,0x37]
+; NDD-NEXT: cmpl $-520, %edi # encoding: [0x81,0xff,0xf8,0xfd,0xff,0xff]
+; NDD-NEXT: # imm = 0xFDF8
+; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i9 %a to i64
+ %v1 = icmp sgt i64 %a64, -521
+ ret i1 %v1
+}
+
+define i1 @sext_i32_mask_sgt(i32 %a) {
+; CHECK-LABEL: sext_i32_mask_sgt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $-521, %edi # encoding: [0x81,0xff,0xf7,0xfd,0xff,0xff]
+; CHECK-NEXT: # imm = 0xFDF7
+; CHECK-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i32 %a to i64
+ %v1 = icmp sgt i64 %a64, -522
+ ret i1 %v1
+}
+
+define i1 @i40_sge(i40 %a) {
+; NO-NDD-LABEL: i40_sge:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: shlq $24, %rdi # encoding: [0x48,0xc1,0xe7,0x18]
+; NO-NDD-NEXT: sarq $24, %rdi # encoding: [0x48,0xc1,0xff,0x18]
+; NO-NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NO-NDD-NEXT: # imm = 0xFDF7
+; NO-NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: i40_sge:
+; NDD: # %bb.0:
+; NDD-NEXT: shlq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x18]
+; NDD-NEXT: sarq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xff,0x18]
+; NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NDD-NEXT: # imm = 0xFDF7
+; NDD-NEXT: setge %al # encoding: [0x0f,0x9d,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i40 %a to i64
+ %v1 = icmp sge i64 %a64, -521
+ ret i1 %v1
+}
+
+define i1 @i40_eq(i40 %a) {
+; NO-NDD-LABEL: i40_eq:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: movabsq $1099511627775, %rax # encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00]
+; NO-NDD-NEXT: # imm = 0xFFFFFFFFFF
+; NO-NDD-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; NO-NDD-NEXT: movabsq $1099511627255, %rcx # encoding: [0x48,0xb9,0xf7,0xfd,0xff,0xff,0xff,0x00,0x00,0x00]
+; NO-NDD-NEXT: # imm = 0xFFFFFFFDF7
+; NO-NDD-NEXT: cmpq %rcx, %rax # encoding: [0x48,0x39,0xc8]
+; NO-NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: i40_eq:
+; NDD: # %bb.0:
+; NDD-NEXT: movabsq $1099511627775, %rax # encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00]
+; NDD-NEXT: # imm = 0xFFFFFFFFFF
+; NDD-NEXT: andq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xf8]
+; NDD-NEXT: movabsq $1099511627255, %rcx # encoding: [0x48,0xb9,0xf7,0xfd,0xff,0xff,0xff,0x00,0x00,0x00]
+; NDD-NEXT: # imm = 0xFFFFFFFDF7
+; NDD-NEXT: cmpq %rcx, %rax # encoding: [0x48,0x39,0xc8]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i40 %a to i64
+ %v1 = icmp eq i64 %a64, -521
+ ret i1 %v1
+}
+
+define i1 @i40_ult(i40 %a) {
+; NO-NDD-LABEL: i40_ult:
+; NO-NDD: # %bb.0:
+; NO-NDD-NEXT: shlq $24, %rdi # encoding: [0x48,0xc1,0xe7,0x18]
+; NO-NDD-NEXT: sarq $24, %rdi # encoding: [0x48,0xc1,0xff,0x18]
+; NO-NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NO-NDD-NEXT: # imm = 0xFDF7
+; NO-NDD-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; NO-NDD-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: i40_ult:
+; NDD: # %bb.0:
+; NDD-NEXT: shlq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x18]
+; NDD-NEXT: sarq $24, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xff,0x18]
+; NDD-NEXT: cmpq $-521, %rdi # encoding: [0x48,0x81,0xff,0xf7,0xfd,0xff,0xff]
+; NDD-NEXT: # imm = 0xFDF7
+; NDD-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+ %a64 = sext i40 %a to i64
+ %v1 = icmp ult i64 %a64, -521
+ ret i1 %v1
+}
diff --git a/llvm/test/CodeGen/X86/exp10-libcall-names.ll b/llvm/test/CodeGen/X86/exp10-libcall-names.ll
index 96e3aae..2688474 100644
--- a/llvm/test/CodeGen/X86/exp10-libcall-names.ll
+++ b/llvm/test/CodeGen/X86/exp10-libcall-names.ll
@@ -13,10 +13,7 @@
; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL-X86
; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL-X64
-; RUN: not llc -mtriple=x86_64-apple-macos10.8 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s
-; Check exp10/exp10f is emitted as __exp10/__exp10f on assorted systems.
-
-; ERR: no libcall available for fexp10
+; Check exp10/exp10f is emitted as __exp10/__exp10f on assorted darwin systems.
define float @test_exp10_f32(float %x) nounwind {
; LINUX-LABEL: test_exp10_f32:
@@ -78,43 +75,3 @@ define double @test_exp10_f64(double %x) nounwind {
%ret = call double @llvm.exp10.f64(double %x)
ret double %ret
}
-
-define x86_fp80 @test_exp10_f80(x86_fp80 %x) nounwind {
-; LINUX-LABEL: test_exp10_f80:
-; LINUX: # %bb.0:
-; LINUX-NEXT: subq $24, %rsp
-; LINUX-NEXT: fldt {{[0-9]+}}(%rsp)
-; LINUX-NEXT: fstpt (%rsp)
-; LINUX-NEXT: callq exp10l@PLT
-; LINUX-NEXT: addq $24, %rsp
-; LINUX-NEXT: retq
-;
-; APPLE-LABEL: test_exp10_f80:
-; APPLE: ## %bb.0:
-; APPLE-NEXT: subq $24, %rsp
-; APPLE-NEXT: fldt {{[0-9]+}}(%rsp)
-; APPLE-NEXT: fstpt (%rsp)
-; APPLE-NEXT: callq _exp10l
-; APPLE-NEXT: addq $24, %rsp
-; APPLE-NEXT: retq
-;
-; GISEL-X86-LABEL: test_exp10_f80:
-; GISEL-X86: # %bb.0:
-; GISEL-X86-NEXT: subl $12, %esp
-; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp)
-; GISEL-X86-NEXT: fstpt (%esp)
-; GISEL-X86-NEXT: calll exp10l
-; GISEL-X86-NEXT: addl $12, %esp
-; GISEL-X86-NEXT: retl
-;
-; GISEL-X64-LABEL: test_exp10_f80:
-; GISEL-X64: # %bb.0:
-; GISEL-X64-NEXT: subq $24, %rsp
-; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
-; GISEL-X64-NEXT: fstpt (%rsp)
-; GISEL-X64-NEXT: callq exp10l
-; GISEL-X64-NEXT: addq $24, %rsp
-; GISEL-X64-NEXT: retq
- %ret = call x86_fp80 @llvm.exp10.f80(x86_fp80 %x)
- ret x86_fp80 %ret
-}
diff --git a/llvm/test/CodeGen/X86/exp10l-libcall-names.ll b/llvm/test/CodeGen/X86/exp10l-libcall-names.ll
new file mode 100644
index 0000000..2e7f9e3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/exp10l-libcall-names.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck -check-prefix=LINUX %s
+; RUN: not llc -mtriple=x86_64-apple-macos10.9 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-ios9.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-tvos9.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-watchos9.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-xros9.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-ios8.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-tvos8.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-xros8.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-driverkit < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=x86_64-apple-driverkit24.0 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: llc < %s -mtriple=i686-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL-X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=GISEL-X64
+
+; ERR: no libcall available for fexp10
+
+define x86_fp80 @test_exp10_f80(x86_fp80 %x) nounwind {
+; LINUX-LABEL: test_exp10_f80:
+; LINUX: # %bb.0:
+; LINUX-NEXT: subq $24, %rsp
+; LINUX-NEXT: fldt {{[0-9]+}}(%rsp)
+; LINUX-NEXT: fstpt (%rsp)
+; LINUX-NEXT: callq exp10l@PLT
+; LINUX-NEXT: addq $24, %rsp
+; LINUX-NEXT: retq
+;
+; GISEL-X86-LABEL: test_exp10_f80:
+; GISEL-X86: # %bb.0:
+; GISEL-X86-NEXT: subl $12, %esp
+; GISEL-X86-NEXT: fldt {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: fstpt (%esp)
+; GISEL-X86-NEXT: calll exp10l
+; GISEL-X86-NEXT: addl $12, %esp
+; GISEL-X86-NEXT: retl
+;
+; GISEL-X64-LABEL: test_exp10_f80:
+; GISEL-X64: # %bb.0:
+; GISEL-X64-NEXT: subq $24, %rsp
+; GISEL-X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: fstpt (%rsp)
+; GISEL-X64-NEXT: callq exp10l
+; GISEL-X64-NEXT: addq $24, %rsp
+; GISEL-X64-NEXT: retq
+ %ret = call x86_fp80 @llvm.exp10.f80(x86_fp80 %x)
+ ret x86_fp80 %ret
+}
diff --git a/llvm/test/CodeGen/X86/huge-stack.ll b/llvm/test/CodeGen/X86/huge-stack.ll
index 920033b..41b8a01 100644
--- a/llvm/test/CodeGen/X86/huge-stack.ll
+++ b/llvm/test/CodeGen/X86/huge-stack.ll
@@ -5,20 +5,70 @@
define void @foo() unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
+; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
; CHECK-NEXT: subq %rax, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8589934470
-; CHECK-NEXT: movb $42, -129(%rsp)
-; CHECK-NEXT: movb $43, -128(%rsp)
-; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
+; CHECK-NEXT: .cfi_def_cfa_offset 8589934480
+; CHECK-NEXT: movabsq $4294967177, %rax # imm = 0xFFFFFF89
+; CHECK-NEXT: movb $42, (%rsp,%rax)
+; CHECK-NEXT: movb $43, -118(%rsp)
+; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
; CHECK-NEXT: addq %rax, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
- %1 = alloca %large, align 1
- %2 = alloca %large, align 1
- %3 = getelementptr inbounds %large, ptr %1, i64 0, i64 0
- store i8 42, ptr %3, align 1
- %4 = getelementptr inbounds %large, ptr %2, i64 0, i64 0
- store i8 43, ptr %4, align 1
+ %large1 = alloca %large, align 1
+ %large2 = alloca %large, align 1
+ %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0
+ store i8 42, ptr %ptrLarge1, align 1
+ %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0
+ store i8 43, ptr %ptrLarge2, align 1
ret void
}
+
+declare ptr @baz(ptr, ptr, ptr, ptr)
+
+define ptr @scavenge_spill() unnamed_addr #0 {
+; CHECK-LABEL: scavenge_spill:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $25769803816, %rax # imm = 0x600000028
+; CHECK-NEXT: subq %rax, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 25769803824
+; CHECK-NEXT: movabsq $21474836521, %rax # imm = 0x500000029
+; CHECK-NEXT: leaq (%rsp,%rax), %rdi
+; CHECK-NEXT: movabsq $17179869226, %rax # imm = 0x40000002A
+; CHECK-NEXT: leaq (%rsp,%rax), %rsi
+; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movabsq $12884901931, %rax # imm = 0x30000002B
+; CHECK-NEXT: leaq (%rsp,%rax), %rdx
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movabsq $8589934636, %rax # imm = 0x20000002C
+; CHECK-NEXT: leaq (%rsp,%rax), %rcx
+; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: callq baz@PLT
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: leaq 46(%rsp), %rdi
+; CHECK-NEXT: callq baz@PLT
+; CHECK-NEXT: # kill: def $rcx killed $rax
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; CHECK-NEXT: movabsq $25769803816, %rcx # imm = 0x600000028
+; CHECK-NEXT: addq %rcx, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+ %large1 = alloca %large, align 1
+ %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0
+ %large2 = alloca %large, align 1
+ %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0
+ %large3 = alloca %large, align 1
+ %ptrLarge3 = getelementptr inbounds %large, ptr %large3, i64 0, i64 0
+ %large4 = alloca %large, align 1
+ %ptrLarge4 = getelementptr inbounds %large, ptr %large4, i64 0, i64 0
+ %large5 = alloca %large, align 1
+ %ptrLarge5 = getelementptr inbounds %large, ptr %large5, i64 0, i64 0
+ %ret1 = call ptr @baz(ptr %ptrLarge1, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4)
+ %large6 = alloca %large, align 1
+ %ptrLarge6 = getelementptr inbounds %large, ptr %large6, i64 0, i64 0
+ %ret2 = call ptr @baz(ptr %ptrLarge6, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4)
+ ret ptr %ret1
+}
diff --git a/llvm/test/CodeGen/X86/large-displacements-fastisel.ll b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll
new file mode 100644
index 0000000..4177466
--- /dev/null
+++ b/llvm/test/CodeGen/X86/large-displacements-fastisel.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -O=0 | FileCheck %s
+@G = global i8 0
+
+; Regression test for PR113856 - incorrect FastISel assert
+
+define i32 @main() {
+; CHECK-LABEL: main:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $-2147483652, %rax # imm = 0xFFFFFFFF7FFFFFFC
+; CHECK-NEXT: movl $0, (%rsp,%rax)
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+ %1 = alloca i32, align 4
+ %G = getelementptr i8, ptr %1, i32 -2147483648
+ store i32 0, ptr %G, align 4
+ ret i32 0
+}
diff --git a/llvm/test/CodeGen/X86/large-displacements.ll b/llvm/test/CodeGen/X86/large-displacements.ll
new file mode 100644
index 0000000..8935ec0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/large-displacements.ll
@@ -0,0 +1,82 @@
+; RUN: not llc < %s -mtriple=i686 -filetype=null 2>&1 | FileCheck %s -check-prefix=ERR-i686
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s -check-prefix=x86_64
+
+; Regression test for #121932, #113856, #106352, #69365, #25051 which are caused by
+; an incorrectly written assertion for 64-bit offsets when compiling for 32-bit X86.
+
+define i32 @main() #0 {
+; ERR-i686: error: <unknown>:0:0: 64-bit offset calculated but target is 32-bit
+;
+; x86_64-LABEL: main:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98
+; x86_64-NEXT: subq %rax, %rsp
+; x86_64-NEXT: .cfi_def_cfa_offset 4294967200
+; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66
+; x86_64-NEXT: movb $32, (%rsp,%rax)
+; x86_64-NEXT: movb $33, 2147483494(%rsp)
+; x86_64-NEXT: movb $34, 1073741670(%rsp)
+; x86_64-NEXT: movb $35, -154(%rsp)
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98
+; x86_64-NEXT: addq %rcx, %rsp
+; x86_64-NEXT: .cfi_def_cfa_offset 8
+; x86_64-NEXT: retq
+entry:
+ %a = alloca [1073741824 x i8], align 16
+ %b = alloca [1073741824 x i8], align 16
+ %c = alloca [1073741824 x i8], align 16
+ %d = alloca [1073741824 x i8], align 16
+
+ %arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42
+ %arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42
+ %arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42
+ %arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42
+
+ store i8 32, ptr %arrayida, align 2
+ store i8 33, ptr %arrayidb, align 2
+ store i8 34, ptr %arrayidc, align 2
+ store i8 35, ptr %arrayidd, align 2
+
+ ret i32 0
+}
+
+; Same test as above but for an anonymous function.
+define i32 @0() #0 {
+; ERR-i686: error: <unknown>:0:0: 64-bit offset calculated but target is 32-bit
+;
+; x86_64-LABEL: __unnamed_1:
+; x86_64: # %bb.0: # %entry
+; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98
+; x86_64-NEXT: subq %rax, %rsp
+; x86_64-NEXT: .cfi_def_cfa_offset 4294967200
+; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66
+; x86_64-NEXT: movb $32, (%rsp,%rax)
+; x86_64-NEXT: movb $33, 2147483494(%rsp)
+; x86_64-NEXT: movb $34, 1073741670(%rsp)
+; x86_64-NEXT: movb $35, -154(%rsp)
+; x86_64-NEXT: xorl %eax, %eax
+; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98
+; x86_64-NEXT: addq %rcx, %rsp
+; x86_64-NEXT: .cfi_def_cfa_offset 8
+; x86_64-NEXT: retq
+entry:
+ %a = alloca [1073741824 x i8], align 16
+ %b = alloca [1073741824 x i8], align 16
+ %c = alloca [1073741824 x i8], align 16
+ %d = alloca [1073741824 x i8], align 16
+
+ %arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42
+ %arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42
+ %arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42
+ %arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42
+
+ store i8 32, ptr %arrayida, align 2
+ store i8 33, ptr %arrayidb, align 2
+ store i8 34, ptr %arrayidc, align 2
+ store i8 35, ptr %arrayidd, align 2
+
+ ret i32 0
+}
+
+attributes #0 = { optnone noinline }
diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
index b26345e..6920e74 100644
--- a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
+++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
@@ -22,8 +22,8 @@ entry:
call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
; CHECK: call{{.*}}bar
; CHECK: addq{{.*}}$2147483647, %rsp
-; CHECK: addq{{.*}}$372037585, %rsp
-; CHECK: .cfi_adjust_cfa_offset -2519521232
+; CHECK: addq{{.*}}$372037601, %rsp
+; CHECK: .cfi_adjust_cfa_offset -2519521248
ret void
}
diff --git a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll
index d9b20f5..4c8bb62 100644
--- a/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-extra-huge.ll
@@ -16,13 +16,13 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X64-NEXT: cmpq %r11, %rsp
; CHECK-X64-NEXT: jne .LBB0_1
; CHECK-X64-NEXT: # %bb.2:
-; CHECK-X64-NEXT: subq $3976, %rsp # imm = 0xF88
+; CHECK-X64-NEXT: subq $3992, %rsp # imm = 0xF98
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
-; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999888
-; CHECK-X64-NEXT: movl $1, 264(%rsp)
-; CHECK-X64-NEXT: movl $1, 28664(%rsp)
-; CHECK-X64-NEXT: movl -128(%rsp), %eax
-; CHECK-X64-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88
+; CHECK-X64-NEXT: .cfi_def_cfa_offset 4799999904
+; CHECK-X64-NEXT: movl $1, 280(%rsp)
+; CHECK-X64-NEXT: movl $1, 28680(%rsp)
+; CHECK-X64-NEXT: movl -112(%rsp), %eax
+; CHECK-X64-NEXT: movabsq $4799999896, %rcx # imm = 0x11E1A2F98
; CHECK-X64-NEXT: addq %rcx, %rsp
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
; CHECK-X64-NEXT: retq
@@ -30,10 +30,10 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X86-LABEL: foo:
; CHECK-X86: # %bb.0:
; CHECK-X86-NEXT: ud2
-; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000016
-; CHECK-X86-NEXT: movl $1, 392(%esp)
-; CHECK-X86-NEXT: movl $1, 28792(%esp)
-; CHECK-X86-NEXT: movl (%esp), %eax
+; CHECK-X86-NEXT: .cfi_def_cfa_offset 4800000032
+; CHECK-X86-NEXT: movl $1, 408(%esp)
+; CHECK-X86-NEXT: movl $1, 28808(%esp)
+; CHECK-X86-NEXT: movl 16(%esp), %eax
; CHECK-X86-NEXT: ud2
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
; CHECK-X86-NEXT: retl
@@ -41,10 +41,10 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X32-LABEL: foo:
; CHECK-X32: # %bb.0:
; CHECK-X32-NEXT: ud2
-; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999888
-; CHECK-X32-NEXT: movl $1, 264(%esp)
-; CHECK-X32-NEXT: movl $1, 28664(%esp)
-; CHECK-X32-NEXT: movl -128(%esp), %eax
+; CHECK-X32-NEXT: .cfi_def_cfa_offset 4799999904
+; CHECK-X32-NEXT: movl $1, 280(%esp)
+; CHECK-X32-NEXT: movl $1, 28680(%esp)
+; CHECK-X32-NEXT: movl -112(%esp), %eax
; CHECK-X32-NEXT: ud2
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
; CHECK-X32-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/stack-clash-huge.ll b/llvm/test/CodeGen/X86/stack-clash-huge.ll
index c999077..0e8c215 100644
--- a/llvm/test/CodeGen/X86/stack-clash-huge.ll
+++ b/llvm/test/CodeGen/X86/stack-clash-huge.ll
@@ -16,13 +16,13 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X64-NEXT: cmpq %r11, %rsp
; CHECK-X64-NEXT: jne .LBB0_1
; CHECK-X64-NEXT: # %bb.2:
-; CHECK-X64-NEXT: subq $1928, %rsp # imm = 0x788
+; CHECK-X64-NEXT: subq $1944, %rsp # imm = 0x798
; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
-; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999888
-; CHECK-X64-NEXT: movl $1, 264(%rsp)
-; CHECK-X64-NEXT: movl $1, 28664(%rsp)
-; CHECK-X64-NEXT: movl -128(%rsp), %eax
-; CHECK-X64-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999904
+; CHECK-X64-NEXT: movl $1, 280(%rsp)
+; CHECK-X64-NEXT: movl $1, 28680(%rsp)
+; CHECK-X64-NEXT: movl -112(%rsp), %eax
+; CHECK-X64-NEXT: movl $2399999896, %ecx # imm = 0x8F0D1798
; CHECK-X64-NEXT: addq %rcx, %rsp
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
; CHECK-X64-NEXT: retq
@@ -39,13 +39,13 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X86-NEXT: cmpl %eax, %esp
; CHECK-X86-NEXT: jne .LBB0_1
; CHECK-X86-NEXT: # %bb.2:
-; CHECK-X86-NEXT: subl $2060, %esp # imm = 0x80C
+; CHECK-X86-NEXT: subl $2076, %esp # imm = 0x81C
; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
-; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000016
-; CHECK-X86-NEXT: movl $1, 392(%esp)
-; CHECK-X86-NEXT: movl $1, 28792(%esp)
-; CHECK-X86-NEXT: movl (%esp), %eax
-; CHECK-X86-NEXT: movl $2400000012, %ecx # imm = 0x8F0D180C
+; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000032
+; CHECK-X86-NEXT: movl $1, 408(%esp)
+; CHECK-X86-NEXT: movl $1, 28808(%esp)
+; CHECK-X86-NEXT: movl 16(%esp), %eax
+; CHECK-X86-NEXT: movl $2400000028, %ecx # imm = 0x8F0D181C
; CHECK-X86-NEXT: addl %ecx, %esp
; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
; CHECK-X86-NEXT: retl
@@ -62,13 +62,13 @@ define i32 @foo() local_unnamed_addr #0 {
; CHECK-X32-NEXT: cmpl %r11d, %esp
; CHECK-X32-NEXT: jne .LBB0_1
; CHECK-X32-NEXT: # %bb.2:
-; CHECK-X32-NEXT: subl $1928, %esp # imm = 0x788
+; CHECK-X32-NEXT: subl $1944, %esp # imm = 0x798
; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
-; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999888
-; CHECK-X32-NEXT: movl $1, 264(%esp)
-; CHECK-X32-NEXT: movl $1, 28664(%esp)
-; CHECK-X32-NEXT: movl -128(%esp), %eax
-; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999904
+; CHECK-X32-NEXT: movl $1, 280(%esp)
+; CHECK-X32-NEXT: movl $1, 28680(%esp)
+; CHECK-X32-NEXT: movl -112(%esp), %eax
+; CHECK-X32-NEXT: movl $2399999896, %ecx # imm = 0x8F0D1798
; CHECK-X32-NEXT: addl %ecx, %esp
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
; CHECK-X32-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
index 9555ce0..732fc65 100644
--- a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
+++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
@@ -10,5 +10,5 @@ start:
attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" }
; CHECK-LABEL: foo:
-; CHECK: movabsq $4294967304, %rax
+; CHECK: movabsq $4294967312, %rax
; CHECK-NEXT: callq probe_stack
diff --git a/llvm/test/DebugInfo/PDB/obj-globalhash.test b/llvm/test/DebugInfo/PDB/obj-globalhash.test
index 116ea91..bff6826 100644
--- a/llvm/test/DebugInfo/PDB/obj-globalhash.test
+++ b/llvm/test/DebugInfo/PDB/obj-globalhash.test
@@ -1,15 +1,15 @@
-RUN: yaml2obj %p/Inputs/obj-hashes-1.yaml -o %T/obj-hashes-1.obj
-RUN: yaml2obj %p/Inputs/obj-hashes-2.yaml -o %T/obj-hashes-2.obj
-RUN: echo obj-hashes-1 > %T/hashes-combined.out
-RUN: llvm-pdbutil dump -type-extras %T/obj-hashes-1.obj >> %T/hashes-combined.out
-RUN: echo obj-hashes-2 >> %T/hashes-combined.out
-RUN: llvm-pdbutil dump -type-extras %T/obj-hashes-2.obj >> %T/hashes-combined.out
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-ONE %s
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-TWO %s
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-THREE %s
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-FOUR %s
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-FIVE %s
-RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-SIX %s
+RUN: yaml2obj %p/Inputs/obj-hashes-1.yaml -o %t.obj-hashes-1.obj
+RUN: yaml2obj %p/Inputs/obj-hashes-2.yaml -o %t.obj-hashes-2.obj
+RUN: echo obj-hashes-1 > %t.hashes-combined.out
+RUN: llvm-pdbutil dump -type-extras %t.obj-hashes-1.obj >> %t.hashes-combined.out
+RUN: echo obj-hashes-2 >> %t.hashes-combined.out
+RUN: llvm-pdbutil dump -type-extras %t.obj-hashes-2.obj >> %t.hashes-combined.out
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-ONE %s
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-TWO %s
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-THREE %s
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-FOUR %s
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-FIVE %s
+RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-SIX %s
; char**. Both the local and global hashes should be the same, since the only
; back-references are for simple types which have fixed indices.
diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
index d609a3f..65542e8 100644
--- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -1,21 +1,45 @@
; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o %t.o
-; RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=READOBJ-RELOCS %s
+; RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELOC %s
; RUN: llvm-objdump --source %t.o | FileCheck --check-prefix=OBJDUMP-SOURCE %s
-; RUN: llvm-dwarfdump --debug-info %t.o | \
-; RUN: FileCheck -check-prefix=DWARF-DUMP %s
-; RUN: llvm-dwarfdump --debug-line -v %t.o | \
-; RUN: FileCheck -check-prefix=LINE-DUMP %s
+; RUN: llvm-dwarfdump --debug-info -debug-line -v %t.o | \
+; RUN: FileCheck -check-prefix=DWARF %s
-; Check that we actually have relocations, otherwise this is kind of pointless.
-; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_info {
-; READOBJ-RELOCS: 0x1B R_RISCV_ADD32 .L0 0x0
-; READOBJ-RELOCS-NEXT: 0x1B R_RISCV_SUB32 .L0 0x0
-; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_frame {
-; READOBJ-RELOCS: 0x20 R_RISCV_ADD32 .L0 0x0
-; READOBJ-RELOCS-NEXT: 0x20 R_RISCV_SUB32 .L0 0x0
-; READOBJ-RELOCS: Section ({{.*}}) .rela.debug_line {
-; READOBJ-RELOCS: 0x5A R_RISCV_ADD16 .L0 0x0
-; READOBJ-RELOCS-NEXT: 0x5A R_RISCV_SUB16 .L0 0x0
+; RELOC: .rela.debug_info {
+; RELOC-NEXT: 0x8 R_RISCV_32 .debug_abbrev 0x0
+; RELOC-NEXT: 0x11 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x15 R_RISCV_32 .Lline_table_start0 0x0
+; RELOC-NEXT: 0x1B R_RISCV_ADD32 .L0 0x0
+; RELOC-NEXT: 0x1B R_RISCV_SUB32 .L0 0x0
+; RELOC-NEXT: 0x1F R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x25 R_RISCV_ADD32 .L0 0x0
+; RELOC-NEXT: 0x25 R_RISCV_SUB32 .L0 0x0
+; RELOC-NEXT: }
+; RELOC-NEXT: .rela.debug_str_offsets {
+; RELOC-NEXT: 0x8 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0xC R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x10 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x14 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x18 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: }
+; RELOC-NEXT: .rela.debug_addr {
+; RELOC-NEXT: 0x8 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: }
+; RELOC-NEXT: .rela.debug_frame {
+; RELOC-NEXT: 0x18 R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x1C R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x20 R_RISCV_ADD32 .L0 0x0
+; RELOC-NEXT: 0x20 R_RISCV_SUB32 .L0 0x0
+; RELOC-NEXT: 0x33 R_RISCV_SET6 .L0 0x0
+; RELOC-NEXT: 0x33 R_RISCV_SUB6 .L0 0x0
+; RELOC-NEXT: }
+; RELOC-NEXT: .rela.debug_line {
+; RELOC-NEXT: 0x22 R_RISCV_32 .debug_line_str 0x0
+; RELOC-NEXT: 0x31 R_RISCV_32 .debug_line_str 0x2
+; RELOC-NEXT: 0x46 R_RISCV_32 .debug_line_str 0x17
+; RELOC-NEXT: 0x4F R_RISCV_32 .L0 0x0
+; RELOC-NEXT: 0x5B R_RISCV_ADD16 .L0 0x0
+; RELOC-NEXT: 0x5B R_RISCV_SUB16 .L0 0x0
+; RELOC-NEXT: }
; Check that we can print the source, even with relocations.
; OBJDUMP-SOURCE: Disassembly of section .text:
@@ -24,70 +48,61 @@
; OBJDUMP-SOURCE: ; {
; OBJDUMP-SOURCE: ; return 0;
-; Check that we correctly dump the DWARF info, even with relocations.
-; DWARF-DUMP: DW_AT_name ("dwarf-riscv-relocs.c")
-; DWARF-DUMP: DW_AT_comp_dir (".")
-; DWARF-DUMP: DW_AT_name ("main")
-; DWARF-DUMP: DW_AT_decl_file ("{{.*}}dwarf-riscv-relocs.c")
-; DWARF-DUMP: DW_AT_decl_line (1)
-; DWARF-DUMP: DW_AT_type (0x00000032 "int")
-; DWARF-DUMP: DW_AT_name ("int")
-; DWARF-DUMP: DW_AT_encoding (DW_ATE_signed)
-; DWARF-DUMP: DW_AT_byte_size (0x04)
-
-; LINE-DUMP: .debug_line contents:
-; LINE-DUMP-NEXT: debug_line[0x00000000]
-; LINE-DUMP-NEXT: Line table prologue:
-; LINE-DUMP-NEXT: total_length: 0x00000061
-; LINE-DUMP-NEXT: format: DWARF32
-; LINE-DUMP-NEXT: version: 5
-; LINE-DUMP-NEXT: address_size: 4
-; LINE-DUMP-NEXT: seg_select_size: 0
-; LINE-DUMP-NEXT: prologue_length: 0x0000003e
-; LINE-DUMP-NEXT: min_inst_length: 1
-; LINE-DUMP-NEXT: max_ops_per_inst: 1
-; LINE-DUMP-NEXT: default_is_stmt: 1
-; LINE-DUMP-NEXT: line_base: -5
-; LINE-DUMP-NEXT: line_range: 14
-; LINE-DUMP-NEXT: opcode_base: 13
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_copy] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_advance_pc] = 1
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_advance_line] = 1
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_file] = 1
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_column] = 1
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
-; LINE-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_isa] = 1
-; LINE-DUMP-NEXT: include_directories[ 0] = .debug_line_str[0x00000000] = "."
-; LINE-DUMP-NEXT: file_names[ 0]:
-; LINE-DUMP-NEXT: name: .debug_line_str[0x00000002] = "dwarf-riscv-relocs.c"
-; LINE-DUMP-NEXT: dir_index: 0
-; LINE-DUMP-NEXT: md5_checksum: 05ab89f5481bc9f2d037e7886641e919
-; LINE-DUMP-NEXT: source: .debug_line_str[0x00000017] = "int main()\n{\n return 0;\n}\n"
-; LINE-DUMP-EMPTY:
-; LINE-DUMP-NEXT: Address Line Column File ISA Discriminator OpIndex Flags
-; LINE-DUMP-NEXT: ------------------ ------ ------ ------ --- ------------- ------- -------------
-; LINE-DUMP-NEXT:0x0000004a: 04 DW_LNS_set_file (0)
-; LINE-DUMP-NEXT:0x0000004c: 00 DW_LNE_set_address (0x00000000)
-; LINE-DUMP-NEXT:0x00000053: 13 address += 0, line += 1, op-index += 0
-; LINE-DUMP-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt
-; LINE-DUMP-NEXT:0x00000054: 05 DW_LNS_set_column (5)
-; LINE-DUMP-NEXT:0x00000056: 0a DW_LNS_set_prologue_end
-; LINE-DUMP-NEXT:0x00000057: 03 DW_LNS_advance_line (3)
-; LINE-DUMP-NEXT:0x00000059: 09 DW_LNS_fixed_advance_pc (addr += 0x001c, op-index = 0)
-; LINE-DUMP-NEXT:0x0000005c: 01 DW_LNS_copy
-; LINE-DUMP-NEXT: 0x000000000000001c 3 5 0 0 0 0 is_stmt prologue_end
-; LINE-DUMP-NEXT:0x0000005d: 06 DW_LNS_negate_stmt
-; LINE-DUMP-NEXT:0x0000005e: 0b DW_LNS_set_epilogue_begin
-; LINE-DUMP-NEXT:0x0000005f: 4a address += 4, line += 0, op-index += 0
-; LINE-DUMP-NEXT: 0x0000000000000020 3 5 0 0 0 0 epilogue_begin
-; LINE-DUMP-NEXT:0x00000060: 02 DW_LNS_advance_pc (addr += 16, op-index += 0)
-; LINE-DUMP-NEXT:0x00000062: 00 DW_LNE_end_sequence
-; LINE-DUMP-NEXT: 0x0000000000000030 3 5 0 0 0 0 end_sequence
+; DWARF: .debug_line contents:
+; DWARF-NEXT: debug_line[0x00000000]
+; DWARF-NEXT: Line table prologue:
+; DWARF-NEXT: total_length: 0x00000062
+; DWARF-NEXT: format: DWARF32
+; DWARF-NEXT: version: 5
+; DWARF-NEXT: address_size: 4
+; DWARF-NEXT: seg_select_size: 0
+; DWARF-NEXT: prologue_length: 0x0000003e
+; DWARF-NEXT: min_inst_length: 1
+; DWARF-NEXT: max_ops_per_inst: 1
+; DWARF-NEXT: default_is_stmt: 1
+; DWARF-NEXT: line_base: -5
+; DWARF-NEXT: line_range: 14
+; DWARF-NEXT: opcode_base: 13
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_copy] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_pc] = 1
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_advance_line] = 1
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_file] = 1
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_column] = 1
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
+; DWARF-NEXT: standard_opcode_lengths[DW_LNS_set_isa] = 1
+; DWARF-NEXT: include_directories[ 0] = .debug_line_str[0x00000000] = "."
+; DWARF-NEXT: file_names[ 0]:
+; DWARF-NEXT: name: .debug_line_str[0x00000002] = "dwarf-riscv-relocs.c"
+; DWARF-NEXT: dir_index: 0
+; DWARF-NEXT: md5_checksum: 05ab89f5481bc9f2d037e7886641e919
+; DWARF-NEXT: source: .debug_line_str[0x00000017] = "int main()\n{\n return 0;\n}\n"
+; DWARF-EMPTY:
+; DWARF-NEXT: Address Line Column File ISA Discriminator OpIndex Flags
+; DWARF-NEXT: ------------------ ------ ------ ------ --- ------------- ------- -------------
+; DWARF-NEXT:0x0000004a: 04 DW_LNS_set_file (0)
+; DWARF-NEXT:0x0000004c: 00 DW_LNE_set_address (0x00000000)
+; DWARF-NEXT:0x00000053: 13 address += 0, line += 1, op-index += 0
+; DWARF-NEXT: 0x0000000000000000 2 0 0 0 0 0 is_stmt
+; DWARF-NEXT:0x00000054: 05 DW_LNS_set_column (5)
+; DWARF-NEXT:0x00000056: 0a DW_LNS_set_prologue_end
+; DWARF-NEXT:0x00000057: f3 address += 16, line += 1, op-index += 0
+; DWARF-NEXT: 0x0000000000000010 3 5 0 0 0 0 is_stmt prologue_end
+; DWARF-NEXT:0x00000058: 03 DW_LNS_advance_line (4)
+; DWARF-NEXT:0x0000005a: 09 DW_LNS_fixed_advance_pc (addr += 0x0010, op-index = 0)
+; DWARF-NEXT:0x0000005d: 01 DW_LNS_copy
+; DWARF-NEXT: 0x0000000000000020 4 5 0 0 0 0 is_stmt
+; DWARF-NEXT:0x0000005e: 06 DW_LNS_negate_stmt
+; DWARF-NEXT:0x0000005f: 0b DW_LNS_set_epilogue_begin
+; DWARF-NEXT:0x00000060: 4a address += 4, line += 0, op-index += 0
+; DWARF-NEXT: 0x0000000000000024 4 5 0 0 0 0 epilogue_begin
+; DWARF-NEXT:0x00000061: 02 DW_LNS_advance_pc (addr += 16, op-index += 0)
+; DWARF-NEXT:0x00000063: 00 DW_LNE_end_sequence
+; DWARF-NEXT: 0x0000000000000034 4 5 0 0 0 0 end_sequence
; ModuleID = 'dwarf-riscv-relocs.c'
source_filename = "dwarf-riscv-relocs.c"
@@ -97,10 +112,8 @@ target triple = "riscv32"
; Function Attrs: noinline nounwind optnone
define dso_local i32 @main() #0 !dbg !7 {
entry:
- call void @ext()
- %retval = alloca i32, align 4
- store i32 0, ptr %retval, align 4
- ret i32 0, !dbg !11
+ call void asm sideeffect ".cfi_remember_state\0A\09.cfi_adjust_cfa_offset 16\0A\09nop\0A\09call ext\0A\09nop\0A\09.cfi_restore_state\0A\09", ""() #1, !dbg !11
+ ret i32 0, !dbg !12
}
declare void @ext()
@@ -123,3 +136,4 @@ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocation(line: 3, column: 5, scope: !7)
+!12 = !DILocation(line: 4, column: 5, scope: !7)
diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test
index b97f8ab..5f930ad 100644
--- a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test
+++ b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test
@@ -1,15 +1,15 @@
-# RUN: opt -module-summary %p/Inputs/main-mod.ll -o %T/main-mod.bc
-# RUN: opt -module-summary %p/Inputs/foo-mod.ll -o %T/foo-mod.bc
-# RUN: opt -module-summary %p/Inputs/bar-mod.ll -o %T/bar-mod.bc
+# RUN: opt -module-summary %p/Inputs/main-mod.ll -o %t.main-mod.bc
+# RUN: opt -module-summary %p/Inputs/foo-mod.ll -o %t.foo-mod.bc
+# RUN: opt -module-summary %p/Inputs/bar-mod.ll -o %t.bar-mod.bc
# REQUIRES: default_triple
# UNSUPPORTED: target=powerpc64{{.*}}
-# RUN: llvm-lto -thinlto -o %T/main-foo-bar %T/main-mod.bc %T/foo-mod.bc %T/bar-mod.bc
+# RUN: llvm-lto -thinlto -o %t.main-foo-bar %t.main-mod.bc %t.foo-mod.bc %t.bar-mod.bc
-# RUN: LLJITWithThinLTOSummaries %T/main-foo-bar.thinlto.bc 2>&1 | FileCheck %s
+# RUN: LLJITWithThinLTOSummaries %t.main-foo-bar.thinlto.bc 2>&1 | FileCheck %s
-# CHECK: About to load module: {{.*}}/main-mod.bc
-# CHECK-DAG: About to load module: {{.*}}/foo-mod.bc
-# CHECK-DAG: About to load module: {{.*}}/bar-mod.bc
+# CHECK: About to load module: {{.*}}main-mod.bc
+# CHECK-DAG: About to load module: {{.*}}foo-mod.bc
+# CHECK-DAG: About to load module: {{.*}}bar-mod.bc
# CHECK: 'main' finished with exit code: 0
diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s
index cf87861..6578539 100644
--- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s
+++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s
@@ -1,8 +1,8 @@
-# RUN: yaml2obj -o %T/libfoo.dylib %S/Inputs/libFooUniversalDylib.yaml
+# RUN: yaml2obj -o %t.libfoo.dylib %S/Inputs/libFooUniversalDylib.yaml
# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj \
-# RUN: -o %T/MachO_foo-in-weak-dylib.o %s
-# RUN: llvm-jitlink -noexec %T/MachO_foo-in-weak-dylib.o \
-# RUN: -weak_library %T/libfoo.dylib
+# RUN: -o %t.MachO_foo-in-weak-dylib.o %s
+# RUN: llvm-jitlink -noexec %t.MachO_foo-in-weak-dylib.o \
+# RUN: -weak_library %t.libfoo.dylib
#
# Check that -weak_library supports universal binaries.
diff --git a/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
index 513e252a..9522bfa 100644
--- a/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -1,5 +1,7 @@
; RUN: %lli -jit-kind=mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
-; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}
+; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}, target={{.*-(cygwin|windows-cygnus)}}
+; This test segfaults on cygwin, but succeeds with cygwin-elf. Unfortunately,
+; cygwin-elf breaks the remote tests due to lack of __register_frame.
define i32 @main() nounwind {
entry:
diff --git a/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s b/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s
index 8259484..7bb22ef 100644
--- a/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s
+++ b/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s
@@ -1,7 +1,8 @@
-# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
-# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
-# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
-# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
+# RUN: rm -rf %t && mkdir %t && cd %t
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o large-reloc.o %s
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s large-reloc.o
+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o large-reloc.o %s
+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s large-reloc.o
.text
.globl g
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
index 1f40a32..13f1bb0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s
@@ -766,3 +766,402 @@ v_cvt_scale_pk8_f32_fp4 v[10:17], v20, 0xcf00
v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1
// GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00]
+
+v_permlane_bcast_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_bcast_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_bcast_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_bcast_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_bcast_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_down_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_down_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_down_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_down_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_down_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_down_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_down_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_down_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_up_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_up_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_up_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_up_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_up_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_up_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_up_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_up_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_up_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_xor_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_xor_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_xor_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_xor_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_xor_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_xor_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_idx_gen_b32 v5, v1, s2
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, s105
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, ttmp15
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, vcc_hi
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, vcc_lo
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, m0
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, exec_hi
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, exec_lo
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00]
+
+v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, v[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x1a,0x04]
+
+v_perm_pk16_b4_u4 v[2:3], v4, ttmp5, s[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, ttmp5, s[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0xe3,0x18,0x00]
+
+v_perm_pk16_b4_u4 v[2:3], s4, v5, v[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], s4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0a,0x1a,0x04]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, 100
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 0x64 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, 4
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 4 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x12,0x02]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[8:9], v[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[8:9], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x11,0x1a,0x04]
+
+v_perm_pk16_b6_u4 v[2:4], v4, ttmp[4:5], s[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0xe1,0x18,0x00]
+
+v_perm_pk16_b6_u4 v[2:4], s4, v[4:5], v[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], s4, v[4:5], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x08,0x1a,0x04]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 100
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 0x64 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 4
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 4 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x12,0x02]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], ttmp[4:5], s[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0xe1,0x18,0x00]
+
+v_perm_pk16_b8_u4 v[2:5], s[4:5], v[4:5], v[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], s[4:5], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x08,0x1a,0x04]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 100
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 0x64 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
index 03f642d..1441f38 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s
@@ -766,3 +766,402 @@ v_cvt_scale_pk8_f32_fp4 v[10:17], v20, 0xcf00
v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1
// GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00]
+
+v_permlane_bcast_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_bcast_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_bcast_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_bcast_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_bcast_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_down_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_down_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_down_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_down_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_down_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_down_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_down_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_down_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_up_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_up_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_up_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_up_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_up_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_up_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_up_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_up_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_up_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_xor_b32 v5, v1, s2, s3
+// GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00]
+
+v_permlane_xor_b32 v5, v1, s105, s105
+// GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01]
+
+v_permlane_xor_b32 v5, v1, ttmp15, ttmp15
+// GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01]
+
+v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo
+// GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01]
+
+v_permlane_xor_b32 v5, v1, vcc_lo, m0
+// GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01]
+
+v_permlane_xor_b32 v5, v1, m0, vcc_hi
+// GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01]
+
+v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo
+// GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01]
+
+v_permlane_xor_b32 v5, v1, exec_lo, src_scc
+// GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03]
+
+v_permlane_idx_gen_b32 v5, v1, s2
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, s105
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, ttmp15
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, vcc_hi
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, vcc_lo
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, m0
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, exec_hi
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00]
+
+v_permlane_idx_gen_b32 v5, v1, exec_lo
+// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00]
+
+v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8
+// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 100.0
+// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, v[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x1a,0x04]
+
+v_perm_pk16_b4_u4 v[2:3], v4, ttmp5, s[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, ttmp5, s[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0xe3,0x18,0x00]
+
+v_perm_pk16_b4_u4 v[2:3], s4, v5, v[6:7]
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], s4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0a,0x1a,0x04]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, 100
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 0x64 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b4_u4 v[2:3], v4, v5, 4
+// GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 4 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x12,0x02]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[8:9], v[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[8:9], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x11,0x1a,0x04]
+
+v_perm_pk16_b6_u4 v[2:4], v4, ttmp[4:5], s[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0xe1,0x18,0x00]
+
+v_perm_pk16_b6_u4 v[2:4], s4, v[4:5], v[6:7]
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], s4, v[4:5], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x08,0x1a,0x04]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 100
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 0x64 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 4
+// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 4 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x12,0x02]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], ttmp[4:5], s[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0xe1,0x18,0x00]
+
+v_perm_pk16_b8_u4 v[2:5], s[4:5], v[4:5], v[6:7]
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], s[4:5], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x08,0x1a,0x04]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 100
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 0x64 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4
+// GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1
+// GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2
+// GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3
+// GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4
+// GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5
+// GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], 100.0
+// GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], v8
+// GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], 100.0
+// GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], v8
+// GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0x11,0x02,0x00]
+
+v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], v4, v8
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0x22,0x04]
+
+v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], s4, 100.0
+// GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
index ce8cfcb..4b44c27 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt
@@ -820,3 +820,402 @@
0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00
# GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00]
+
+0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01]
+
+0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03
+# GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03]
+
+0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01]
+
+0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01]
+
+0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00
+# GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00]
+
+0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01]
+
+0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01]
+
+0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01
+# GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03
+# GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03]
+
+0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00
+# GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00]
+
+0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01]
+
+0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01
+# GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03
+# GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03]
+
+0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00
+# GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00]
+
+0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01]
+
+0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01
+# GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03
+# GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03]
+
+0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00
+# GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00]
+
+0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01]
+
+0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01
+# GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01]
+
+0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00]
+
+0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00
+# GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00]
+
+0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc2,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc2,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc2,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xc1,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc1,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc1,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0x99,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0x99,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_bf8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x99,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xbc,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xbc,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_bf16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbc,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xb9,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xb9,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_f16 v10, v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xb9,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0x97,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0x97,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp4_f32 v10, v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x97,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xc0,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xc0,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_bf16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xc0,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xbf,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xbf,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_f16 v[10:11], v[20:23], v4, v8 ; encoding: [0x0a,0x00,0xbf,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0x98,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0x98,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk8_fp8_f32 v[10:11], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0x98,0xd6,0x14,0x09,0x22,0x04]
+
+0x02,0x00,0x3f,0xd6,0x04,0x0a,0x1a,0x04
+# GFX1250: v_perm_pk16_b4_u4 v[2:3], s4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0a,0x1a,0x04]
+
+0x02,0x00,0x3f,0xd6,0x04,0xe3,0x18,0x00
+# GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, ttmp5, s[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0xe3,0x18,0x00]
+
+0x02,0x00,0x3f,0xd6,0x04,0x0b,0xfe,0x03,0x64,0x00,0x00,0x00
+# GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 0x64 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+0x02,0x00,0x3f,0xd6,0x04,0x0b,0x12,0x02
+# GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, 4 ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x12,0x02]
+
+0x02,0x00,0x3f,0xd6,0x04,0x0b,0x1a,0x04
+# GFX1250: v_perm_pk16_b4_u4 v[2:3], v4, v5, v[6:7] ; encoding: [0x02,0x00,0x3f,0xd6,0x04,0x0b,0x1a,0x04]
+
+0x02,0x00,0x42,0xd6,0x04,0x08,0x1a,0x04
+# GFX1250: v_perm_pk16_b6_u4 v[2:4], s4, v[4:5], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x08,0x1a,0x04]
+
+0x02,0x00,0x42,0xd6,0x04,0xe1,0x18,0x00
+# GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0xe1,0x18,0x00]
+
+0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00
+# GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 0x64 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+0x02,0x00,0x42,0xd6,0x04,0x09,0x12,0x02
+# GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 4 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x12,0x02]
+
+0x02,0x00,0x42,0xd6,0x04,0x11,0x1a,0x04
+# GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[8:9], v[6:7] ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x11,0x1a,0x04]
+
+0x02,0x00,0x43,0xd6,0x04,0x08,0x1a,0x04
+# GFX1250: v_perm_pk16_b8_u4 v[2:5], s[4:5], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x08,0x1a,0x04]
+
+0x02,0x00,0x43,0xd6,0x04,0xe1,0x18,0x00
+# GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], ttmp[4:5], s[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0xe1,0x18,0x00]
+
+0x02,0x00,0x43,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00
+# GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 0x64 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0xfe,0x03,0x64,0x00,0x00,0x00]
+
+0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02
+# GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[4:5], 4 ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x09,0x12,0x02]
+
+0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04
+# GFX1250: v_perm_pk16_b8_u4 v[2:5], v[4:5], v[8:9], v[6:7] ; encoding: [0x02,0x00,0x43,0xd6,0x04,0x11,0x1a,0x04]
+
+0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_bf6 v[10:17], v[20:22], v8 scale_sel:1 ; encoding: [0x0a,0x08,0xcb,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_bf16_fp6 v[10:17], v[20:22], v8 scale_sel:3 ; encoding: [0x0a,0x18,0xc8,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_bf6 v[10:17], v[20:22], v8 scale_sel:2 ; encoding: [0x0a,0x10,0xca,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 ; encoding: [0x0a,0x00,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f16_fp6 v[10:17], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc7,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_bf6 v[10:25], v[20:22], v8 scale_sel:5 ; encoding: [0x0a,0x28,0xcc,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], 0xcf00 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0xff,0x01,0x00,0x00,0xcf,0x00,0x00]
+
+0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 ; encoding: [0x0a,0x00,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scale_pk16_f32_fp6 v[10:25], v[20:22], v8 scale_sel:4 ; encoding: [0x0a,0x20,0xc9,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xd2,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd2,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_bf6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd2,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xd0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd0,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_bf6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd0,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xce,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xce,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_bf6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xce,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xd1,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd1,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_fp6_bf16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xd1,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xcf,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xcf,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_fp6_f16 v[10:12], v[20:27], v8 ; encoding: [0x0a,0x00,0xcf,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xcd,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], 0x42c80000 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xcd,0xd6,0x14,0x11,0x02,0x00
+# GFX1250: v_cvt_scalef32_pk16_fp6_f32 v[10:12], v[20:35], v8 ; encoding: [0x0a,0x00,0xcd,0xd6,0x14,0x11,0x02,0x00]
+
+0x0a,0x00,0xd8,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd8,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd8,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xd6,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd6,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd6,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xd4,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd4,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_bf6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd4,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xd7,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd7,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_bf16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd7,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xd5,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd5,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_f16 v[10:12], v[20:27], v4, v8 ; encoding: [0x0a,0x00,0xd5,0xd6,0x14,0x09,0x22,0x04]
+
+0x0a,0x00,0xd3,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], s4, 0x42c80000 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0xfc,0x03,0x00,0x00,0xc8,0x42]
+
+0x0a,0x00,0xd3,0xd6,0x14,0x09,0x22,0x04
+# GFX1250: v_cvt_scalef32_sr_pk16_fp6_f32 v[10:12], v[20:35], v4, v8 ; encoding: [0x0a,0x00,0xd3,0xd6,0x14,0x09,0x22,0x04]
diff --git a/llvm/test/MC/ELF/many-instructions.s b/llvm/test/MC/ELF/many-instructions.s
new file mode 100644
index 0000000..cbdb2a7
--- /dev/null
+++ b/llvm/test/MC/ELF/many-instructions.s
@@ -0,0 +1,10 @@
+# REQUIRES: asserts
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o /dev/null -debug-only=mc-dump
+
+## Test that encodeInstruction may cause a new fragment to be created.
+# CHECK: 0 Data Size:16200
+# CHECK: 16200 Data Size:180
+
+.rept 16384/10
+movabsq $foo, %rax
+.endr
diff --git a/llvm/test/MC/ELF/mc-dump.s b/llvm/test/MC/ELF/mc-dump.s
index fd6cf95..51b3ff4 100644
--- a/llvm/test/MC/ELF/mc-dump.s
+++ b/llvm/test/MC/ELF/mc-dump.s
@@ -1,5 +1,5 @@
# REQUIRES: asserts
-# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t -debug-only=mc-dump-pre,mc-dump 2>&1 | FileCheck %s --match-full-lines --strict-whitespace
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t -debug-only=mc-dump-pre,mc-dump -stats 2>&1 | FileCheck %s --match-full-lines --strict-whitespace
#CHECK-LABEL:assembler backend - pre-layout
# CHECK:MCSection Name:.text
@@ -30,6 +30,9 @@
# CHECK-NEXT:5 LEB Size:0+1 [15] Value:.Ltmp0-_start Signed:0
# CHECK:]
+# CHECK: 2 assembler - Number of fixup evaluations for relaxation
+# CHECK: 8 assembler - Number of fixups
+
# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t -debug-only=mc-dump -save-temp-labels -g 2>&1 | FileCheck %s --check-prefix=CHECK2
# CHECK2:5 Data Size:16 [48,8b,04,25,00,00,00,00,48,8b,04,25,00,00,00,00]
diff --git a/llvm/test/Object/archive-darwin-duplicates.test b/llvm/test/Object/archive-darwin-duplicates.test
index 8642a31..f6aafcb 100644
--- a/llvm/test/Object/archive-darwin-duplicates.test
+++ b/llvm/test/Object/archive-darwin-duplicates.test
@@ -4,13 +4,13 @@ The two members with the duplicated name "test.o" must have unique
non-zero timestamps, while baz.o, being unique, remains a zero
timestamp.
-RUN: mkdir -p %T/sub1
-RUN: printf test > %T/test.o
-RUN: printf sub1/test > %T/sub1/test.o
-RUN: printf baz > %T/baz.o
+RUN: mkdir -p %t.dir/sub1
+RUN: printf test > %t.dir/test.o
+RUN: printf sub1/test > %t.dir/sub1/test.o
+RUN: printf baz > %t.dir/baz.o
RUN: rm -f %t.a
-RUN: llvm-ar --format=darwin rcs %t.a %T/sub1/test.o %T/test.o %T/baz.o
+RUN: llvm-ar --format=darwin rcs %t.a %t.dir/sub1/test.o %t.dir/test.o %t.dir/baz.o
RUN: FileCheck -strict-whitespace %s < %t.a
CHECK:#1/12 1 0 0 644 28 `
diff --git a/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td b/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td
index fa3484e..93525f7 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td
@@ -1,7 +1,7 @@
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/context-non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK_NOPT -input-file=%T/context-non-optimized.cpp
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %T/context-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK_OPT -input-file=%T/context-optimized.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.context-non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK_NOPT -input-file=%t.context-non-optimized.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %t.context-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK_OPT -input-file=%t.context-optimized.cpp
diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
index c3895b5..4a516c6 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
@@ -1,26 +1,26 @@
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/non-optimized.cpp
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %T/optimized.cpp
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common %s -o %T/default.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.non-optimized.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %t.optimized.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common %s -o %t.default.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19N -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19N -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19O -input-file=%t.optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21N -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21N -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21O -input-file=%t.optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20N -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20N -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20O -input-file=%t.optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00N -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00N -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00O -input-file=%t.optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01N -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01N -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01O -input-file=%t.optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02N,NOOPT -input-file=%T/non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02O -input-file=%T/optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02N,NOOPT -input-file=%t.non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02O -input-file=%t.optimized.cpp
-// RUN: diff %T/default.cpp %T/optimized.cpp
+// RUN: diff %t.default.cpp %t.optimized.cpp
include "llvm/Target/Target.td"
include "GlobalISelEmitterCommon.td"
diff --git a/llvm/test/TableGen/GlobalISelEmitter/HwModes.td b/llvm/test/TableGen/GlobalISelEmitter/HwModes.td
index f112577..04f6872 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/HwModes.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/HwModes.td
@@ -1,5 +1,5 @@
-// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/hwmode-non-optimized.cpp
-// RUN: FileCheck %s --check-prefixes=CHECK -input-file=%T/hwmode-non-optimized.cpp
+// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.hwmode-non-optimized.cpp
+// RUN: FileCheck %s --check-prefixes=CHECK -input-file=%t.hwmode-non-optimized.cpp
include "llvm/Target/Target.td"
diff --git a/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll b/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll
index be3663c..b18ad5d 100644
--- a/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll
+++ b/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll
@@ -1,30 +1,52 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes='require<phi-values>,gvn' -S < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='require<phi-values>,gvn' -S < %s | FileCheck %s --check-prefixes=CHECK-MEMDEP
+; RUN: opt -passes='require<phi-values>,gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK-MEMSSA
declare noalias ptr @malloc(i64)
; Detecting that %s is fully redundant should let us detect that %w is partially
; redundant.
define void @fn1(ptr noalias %start, ptr %width, i32 %h) {
-; CHECK-LABEL: @fn1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
-; CHECK-NEXT: store ptr [[CALL]], ptr [[START:%.*]], align 8
-; CHECK-NEXT: br label [[PREHEADER:%.*]]
-; CHECK: preheader:
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[PREHEADER_BODY_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
-; CHECK: preheader.body_crit_edge:
-; CHECK-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH:%.*]], align 8
-; CHECK-NEXT: br label [[BODY:%.*]]
-; CHECK: body:
-; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], [[BODY]] ]
-; CHECK-NEXT: store i32 0, ptr [[CALL]], align 4
-; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[BODY]], label [[PREHEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; CHECK-MEMDEP-LABEL: define void @fn1(
+; CHECK-MEMDEP-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]]) {
+; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMDEP-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+; CHECK-MEMDEP-NEXT: store ptr [[CALL]], ptr [[START]], align 8
+; CHECK-MEMDEP-NEXT: br label %[[PREHEADER:.*]]
+; CHECK-MEMDEP: [[PREHEADER]]:
+; CHECK-MEMDEP-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]]
+; CHECK-MEMDEP-NEXT: br i1 [[CMP]], label %[[PREHEADER_BODY_CRIT_EDGE:.*]], label %[[EXIT:.*]]
+; CHECK-MEMDEP: [[PREHEADER_BODY_CRIT_EDGE]]:
+; CHECK-MEMDEP-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH]], align 8
+; CHECK-MEMDEP-NEXT: br label %[[BODY:.*]]
+; CHECK-MEMDEP: [[BODY]]:
+; CHECK-MEMDEP-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], %[[BODY]] ]
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[CALL]], align 4
+; CHECK-MEMDEP-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
+; CHECK-MEMDEP-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]]
+; CHECK-MEMDEP-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]]
+; CHECK-MEMDEP: [[EXIT]]:
+; CHECK-MEMDEP-NEXT: ret void
+;
+; CHECK-MEMSSA-LABEL: define void @fn1(
+; CHECK-MEMSSA-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]]) {
+; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMSSA-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+; CHECK-MEMSSA-NEXT: store ptr [[CALL]], ptr [[START]], align 8
+; CHECK-MEMSSA-NEXT: br label %[[PREHEADER:.*]]
+; CHECK-MEMSSA: [[PREHEADER]]:
+; CHECK-MEMSSA-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]]
+; CHECK-MEMSSA-NEXT: br i1 [[CMP]], label %[[BODY:.*]], label %[[EXIT:.*]]
+; CHECK-MEMSSA: [[BODY]]:
+; CHECK-MEMSSA-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER]] ], [ [[J_NEXT:%.*]], %[[BODY]] ]
+; CHECK-MEMSSA-NEXT: [[S:%.*]] = load ptr, ptr [[START]], align 8
+; CHECK-MEMSSA-NEXT: store i32 0, ptr [[S]], align 4
+; CHECK-MEMSSA-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
+; CHECK-MEMSSA-NEXT: [[W:%.*]] = load i32, ptr [[WIDTH]], align 8
+; CHECK-MEMSSA-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W]]
+; CHECK-MEMSSA-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]]
+; CHECK-MEMSSA: [[EXIT]]:
+; CHECK-MEMSSA-NEXT: ret void
;
entry:
%call = tail call noalias ptr @malloc(i64 1024)
@@ -52,33 +74,61 @@ exit:
; %w is partially redundant requires alias analysis that can analyze those
; values.
define void @fn2(ptr noalias %start, ptr %width, i32 %h, i32 %arg) {
-; CHECK-LABEL: @fn2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
-; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG:%.*]], 0
-; CHECK-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK: if:
-; CHECK-NEXT: store ptr [[CALL]], ptr [[START:%.*]], align 8
-; CHECK-NEXT: br label [[PREHEADER:%.*]]
-; CHECK: else:
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]]
-; CHECK-NEXT: store ptr [[GEP]], ptr [[START]], align 8
-; CHECK-NEXT: br label [[PREHEADER]]
-; CHECK: preheader:
-; CHECK-NEXT: [[S:%.*]] = phi ptr [ [[S]], [[BODY:%.*]] ], [ [[GEP]], [[ELSE]] ], [ [[CALL]], [[IF]] ]
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[PREHEADER_BODY_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
-; CHECK: preheader.body_crit_edge:
-; CHECK-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH:%.*]], align 8
-; CHECK-NEXT: br label [[BODY]]
-; CHECK: body:
-; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], [[BODY]] ]
-; CHECK-NEXT: store i32 0, ptr [[S]], align 4
-; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[BODY]], label [[PREHEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; CHECK-MEMDEP-LABEL: define void @fn2(
+; CHECK-MEMDEP-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]], i32 [[ARG:%.*]]) {
+; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMDEP-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+; CHECK-MEMDEP-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG]], 0
+; CHECK-MEMDEP-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[ELSE:.*]]
+; CHECK-MEMDEP: [[IF]]:
+; CHECK-MEMDEP-NEXT: store ptr [[CALL]], ptr [[START]], align 8
+; CHECK-MEMDEP-NEXT: br label %[[PREHEADER:.*]]
+; CHECK-MEMDEP: [[ELSE]]:
+; CHECK-MEMDEP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]]
+; CHECK-MEMDEP-NEXT: store ptr [[GEP]], ptr [[START]], align 8
+; CHECK-MEMDEP-NEXT: br label %[[PREHEADER]]
+; CHECK-MEMDEP: [[PREHEADER]]:
+; CHECK-MEMDEP-NEXT: [[S:%.*]] = phi ptr [ [[S]], %[[BODY:.*]] ], [ [[GEP]], %[[ELSE]] ], [ [[CALL]], %[[IF]] ]
+; CHECK-MEMDEP-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]]
+; CHECK-MEMDEP-NEXT: br i1 [[CMP]], label %[[PREHEADER_BODY_CRIT_EDGE:.*]], label %[[EXIT:.*]]
+; CHECK-MEMDEP: [[PREHEADER_BODY_CRIT_EDGE]]:
+; CHECK-MEMDEP-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH]], align 8
+; CHECK-MEMDEP-NEXT: br label %[[BODY]]
+; CHECK-MEMDEP: [[BODY]]:
+; CHECK-MEMDEP-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], %[[BODY]] ]
+; CHECK-MEMDEP-NEXT: store i32 0, ptr [[S]], align 4
+; CHECK-MEMDEP-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
+; CHECK-MEMDEP-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]]
+; CHECK-MEMDEP-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]]
+; CHECK-MEMDEP: [[EXIT]]:
+; CHECK-MEMDEP-NEXT: ret void
+;
+; CHECK-MEMSSA-LABEL: define void @fn2(
+; CHECK-MEMSSA-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]], i32 [[ARG:%.*]]) {
+; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]]
+; CHECK-MEMSSA-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024)
+; CHECK-MEMSSA-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG]], 0
+; CHECK-MEMSSA-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[ELSE:.*]]
+; CHECK-MEMSSA: [[IF]]:
+; CHECK-MEMSSA-NEXT: store ptr [[CALL]], ptr [[START]], align 8
+; CHECK-MEMSSA-NEXT: br label %[[PREHEADER:.*]]
+; CHECK-MEMSSA: [[ELSE]]:
+; CHECK-MEMSSA-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]]
+; CHECK-MEMSSA-NEXT: store ptr [[GEP]], ptr [[START]], align 8
+; CHECK-MEMSSA-NEXT: br label %[[PREHEADER]]
+; CHECK-MEMSSA: [[PREHEADER]]:
+; CHECK-MEMSSA-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]]
+; CHECK-MEMSSA-NEXT: br i1 [[CMP]], label %[[BODY:.*]], label %[[EXIT:.*]]
+; CHECK-MEMSSA: [[BODY]]:
+; CHECK-MEMSSA-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER]] ], [ [[J_NEXT:%.*]], %[[BODY]] ]
+; CHECK-MEMSSA-NEXT: [[S:%.*]] = load ptr, ptr [[START]], align 8
+; CHECK-MEMSSA-NEXT: store i32 0, ptr [[S]], align 4
+; CHECK-MEMSSA-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
+; CHECK-MEMSSA-NEXT: [[W:%.*]] = load i32, ptr [[WIDTH]], align 8
+; CHECK-MEMSSA-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W]]
+; CHECK-MEMSSA-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]]
+; CHECK-MEMSSA: [[EXIT]]:
+; CHECK-MEMSSA-NEXT: ret void
;
entry:
%call = tail call noalias ptr @malloc(i64 1024)
diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll
index c81c1fe..e495163 100644
--- a/llvm/test/Transforms/GVN/PRE/rle.ll
+++ b/llvm/test/Transforms/GVN/PRE/rle.ll
@@ -1,12 +1,32 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE
-; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMDEP
+; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes='gvn<memoryssa>',dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMSSA
+; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMDEP
+; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes='gvn<memoryssa>',dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMSSA
;; Trivial RLE test.
define i32 @test0(i32 %V, ptr %P) {
-; CHECK-LABEL: @test0(
-; CHECK-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: ret i32 [[V]]
+; LE-MEMDEP-LABEL: define i32 @test0(
+; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: ret i32 [[V]]
+;
+; LE-MEMSSA-LABEL: define i32 @test0(
+; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret i32 [[A]]
+;
+; BE-MEMDEP-LABEL: define i32 @test0(
+; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: ret i32 [[V]]
+;
+; BE-MEMSSA-LABEL: define i32 @test0(
+; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret i32 [[A]]
;
store i32 %V, ptr %P
@@ -21,8 +41,9 @@ define i32 @test0(i32 %V, ptr %P) {
;; PR5016
define i8 @crash0({i32, i32} %A, ptr %P) {
-; CHECK-LABEL: @crash0(
-; CHECK-NEXT: store { i32, i32 } [[A:%.*]], ptr [[P:%.*]], align 4
+; CHECK-LABEL: define i8 @crash0(
+; CHECK-SAME: { i32, i32 } [[A:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store { i32, i32 } [[A]], ptr [[P]], align 4
; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[P]], align 1
; CHECK-NEXT: ret i8 [[Y]]
;
@@ -34,7 +55,7 @@ define i8 @crash0({i32, i32} %A, ptr %P) {
;; No PR filed, crashed in CaptureTracker.
declare void @helper()
define void @crash1() {
-; CHECK-LABEL: @crash1(
+; CHECK-LABEL: define void @crash1() {
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr undef, ptr undef, i64 undef, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: ret void
;
@@ -52,10 +73,29 @@ define void @crash1() {
;; i32 -> f32 forwarding.
define float @coerce_mustalias1(i32 %V, ptr %P) {
-; CHECK-LABEL: @coerce_mustalias1(
-; CHECK-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float
-; CHECK-NEXT: ret float [[TMP1]]
+; LE-MEMDEP-LABEL: define float @coerce_mustalias1(
+; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float
+; LE-MEMDEP-NEXT: ret float [[TMP1]]
+;
+; LE-MEMSSA-LABEL: define float @coerce_mustalias1(
+; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret float [[A]]
+;
+; BE-MEMDEP-LABEL: define float @coerce_mustalias1(
+; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float
+; BE-MEMDEP-NEXT: ret float [[TMP1]]
+;
+; BE-MEMSSA-LABEL: define float @coerce_mustalias1(
+; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret float [[A]]
;
store i32 %V, ptr %P
@@ -66,11 +106,31 @@ define float @coerce_mustalias1(i32 %V, ptr %P) {
;; ptr -> float forwarding.
define float @coerce_mustalias2(ptr %V, ptr %P) {
-; CHECK-LABEL: @coerce_mustalias2(
-; CHECK-NEXT: store ptr [[V:%.*]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
-; CHECK-NEXT: ret float [[TMP2]]
+; LE-MEMDEP-LABEL: define float @coerce_mustalias2(
+; LE-MEMDEP-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store ptr [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; LE-MEMDEP-NEXT: ret float [[TMP2]]
+;
+; LE-MEMSSA-LABEL: define float @coerce_mustalias2(
+; LE-MEMSSA-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store ptr [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret float [[A]]
+;
+; BE-MEMDEP-LABEL: define float @coerce_mustalias2(
+; BE-MEMDEP-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store ptr [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; BE-MEMDEP-NEXT: ret float [[TMP2]]
+;
+; BE-MEMSSA-LABEL: define float @coerce_mustalias2(
+; BE-MEMSSA-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store ptr [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret float [[A]]
;
store ptr %V, ptr %P
@@ -81,11 +141,31 @@ define float @coerce_mustalias2(ptr %V, ptr %P) {
;; float -> ptr forwarding.
define ptr @coerce_mustalias3(float %V, ptr %P) {
-; CHECK-LABEL: @coerce_mustalias3(
-; CHECK-NEXT: store float [[V:%.*]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr
-; CHECK-NEXT: ret ptr [[TMP2]]
+; LE-MEMDEP-LABEL: define ptr @coerce_mustalias3(
+; LE-MEMDEP-SAME: float [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store float [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr
+; LE-MEMDEP-NEXT: ret ptr [[TMP2]]
+;
+; LE-MEMSSA-LABEL: define ptr @coerce_mustalias3(
+; LE-MEMSSA-SAME: float [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store float [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret ptr [[A]]
+;
+; BE-MEMDEP-LABEL: define ptr @coerce_mustalias3(
+; BE-MEMDEP-SAME: float [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store float [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr
+; BE-MEMDEP-NEXT: ret ptr [[TMP2]]
+;
+; BE-MEMSSA-LABEL: define ptr @coerce_mustalias3(
+; BE-MEMSSA-SAME: float [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store float [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret ptr [[A]]
;
store float %V, ptr %P
@@ -96,14 +176,47 @@ define ptr @coerce_mustalias3(float %V, ptr %P) {
;; i32 -> f32 load forwarding.
define float @coerce_mustalias4(ptr %P, i1 %cond) {
-; CHECK-LABEL: @coerce_mustalias4(
-; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; CHECK: T:
-; CHECK-NEXT: ret float [[TMP1]]
-; CHECK: F:
-; CHECK-NEXT: ret float [[TMP1]]
+; LE-MEMDEP-LABEL: define float @coerce_mustalias4(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: ret float [[TMP1]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: ret float [[TMP1]]
+;
+; LE-MEMSSA-LABEL: define float @coerce_mustalias4(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[B:%.*]] = load float, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: ret float [[B]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: [[X:%.*]] = bitcast i32 [[A]] to float
+; LE-MEMSSA-NEXT: ret float [[X]]
+;
+; BE-MEMDEP-LABEL: define float @coerce_mustalias4(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: ret float [[TMP1]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: ret float [[TMP1]]
+;
+; BE-MEMSSA-LABEL: define float @coerce_mustalias4(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[B:%.*]] = load float, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: ret float [[B]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: [[X:%.*]] = bitcast i32 [[A]] to float
+; BE-MEMSSA-NEXT: ret float [[X]]
;
%A = load i32, ptr %P
@@ -120,16 +233,30 @@ F:
;; i32 -> i8 forwarding
define i8 @coerce_mustalias5(i32 %V, ptr %P) {
-; LE-LABEL: @coerce_mustalias5(
-; LE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8
-; LE-NEXT: ret i8 [[TMP1]]
+; LE-MEMDEP-LABEL: define i8 @coerce_mustalias5(
+; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8
+; LE-MEMDEP-NEXT: ret i8 [[TMP1]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_mustalias5(
+; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_mustalias5(
+; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 24
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; BE-MEMDEP-NEXT: ret i8 [[TMP2]]
;
-; BE-LABEL: @coerce_mustalias5(
-; BE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 24
-; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; BE-NEXT: ret i8 [[TMP2]]
+; BE-MEMSSA-LABEL: define i8 @coerce_mustalias5(
+; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
store i32 %V, ptr %P
@@ -140,18 +267,32 @@ define i8 @coerce_mustalias5(i32 %V, ptr %P) {
;; i64 -> float forwarding
define float @coerce_mustalias6(i64 %V, ptr %P) {
-; LE-LABEL: @coerce_mustalias6(
-; LE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4
-; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32
-; LE-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
-; LE-NEXT: ret float [[TMP2]]
-;
-; BE-LABEL: @coerce_mustalias6(
-; BE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4
-; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32
-; BE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; BE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
-; BE-NEXT: ret float [[TMP3]]
+; LE-MEMDEP-LABEL: define float @coerce_mustalias6(
+; LE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; LE-MEMDEP-NEXT: ret float [[TMP2]]
+;
+; LE-MEMSSA-LABEL: define float @coerce_mustalias6(
+; LE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret float [[A]]
+;
+; BE-MEMDEP-LABEL: define float @coerce_mustalias6(
+; BE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; BE-MEMDEP-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
+; BE-MEMDEP-NEXT: ret float [[TMP3]]
+;
+; BE-MEMSSA-LABEL: define float @coerce_mustalias6(
+; BE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret float [[A]]
;
store i64 %V, ptr %P
@@ -162,18 +303,32 @@ define float @coerce_mustalias6(i64 %V, ptr %P) {
;; i64 -> ptr (32-bit) forwarding
define ptr @coerce_mustalias7(i64 %V, ptr %P) {
-; LE-LABEL: @coerce_mustalias7(
-; LE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4
-; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32
-; LE-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr
-; LE-NEXT: ret ptr [[TMP2]]
-;
-; BE-LABEL: @coerce_mustalias7(
-; BE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4
-; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32
-; BE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; BE-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr
-; BE-NEXT: ret ptr [[TMP3]]
+; LE-MEMDEP-LABEL: define ptr @coerce_mustalias7(
+; LE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr
+; LE-MEMDEP-NEXT: ret ptr [[TMP2]]
+;
+; LE-MEMSSA-LABEL: define ptr @coerce_mustalias7(
+; LE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: ret ptr [[A]]
+;
+; BE-MEMDEP-LABEL: define ptr @coerce_mustalias7(
+; BE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; BE-MEMDEP-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr
+; BE-MEMDEP-NEXT: ret ptr [[TMP3]]
+;
+; BE-MEMSSA-LABEL: define ptr @coerce_mustalias7(
+; BE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: ret ptr [[A]]
;
store i64 %V, ptr %P
@@ -184,10 +339,33 @@ define ptr @coerce_mustalias7(i64 %V, ptr %P) {
; memset -> i16 forwarding.
define signext i16 @memset_to_i16_local(ptr %A) nounwind ssp {
-; CHECK-LABEL: @memset_to_i16_local(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A:%.*]], i8 1, i64 200, i1 false)
-; CHECK-NEXT: ret i16 257
+; LE-MEMDEP-LABEL: define signext i16 @memset_to_i16_local(
+; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false)
+; LE-MEMDEP-NEXT: ret i16 257
+;
+; LE-MEMSSA-LABEL: define signext i16 @memset_to_i16_local(
+; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false)
+; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 42
+; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; LE-MEMSSA-NEXT: ret i16 [[TTMP2]]
+;
+; BE-MEMDEP-LABEL: define signext i16 @memset_to_i16_local(
+; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false)
+; BE-MEMDEP-NEXT: ret i16 257
+;
+; BE-MEMSSA-LABEL: define signext i16 @memset_to_i16_local(
+; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false)
+; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 42
+; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; BE-MEMSSA-NEXT: ret i16 [[TTMP2]]
;
entry:
tail call void @llvm.memset.p0.i64(ptr %A, i8 1, i64 200, i1 false)
@@ -198,16 +376,45 @@ entry:
; memset -> float forwarding.
define float @memset_to_float_local(ptr %A, i8 %Val) nounwind ssp {
-; CHECK-LABEL: @memset_to_float_local(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A:%.*]], i8 [[VAL:%.*]], i64 400, i1 false)
-; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
-; CHECK-NEXT: ret float [[TMP5]]
+; LE-MEMDEP-LABEL: define float @memset_to_float_local(
+; LE-MEMDEP-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false)
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; LE-MEMDEP-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16
+; LE-MEMDEP-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
+; LE-MEMDEP-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; LE-MEMDEP-NEXT: ret float [[TMP5]]
+;
+; LE-MEMSSA-LABEL: define float @memset_to_float_local(
+; LE-MEMSSA-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false)
+; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 42
+; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; LE-MEMSSA-NEXT: ret float [[TTMP2]]
+;
+; BE-MEMDEP-LABEL: define float @memset_to_float_local(
+; BE-MEMDEP-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false)
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; BE-MEMDEP-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16
+; BE-MEMDEP-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
+; BE-MEMDEP-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
+; BE-MEMDEP-NEXT: ret float [[TMP5]]
+;
+; BE-MEMSSA-LABEL: define float @memset_to_float_local(
+; BE-MEMSSA-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false)
+; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 42
+; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; BE-MEMSSA-NEXT: ret float [[TTMP2]]
;
entry:
tail call void @llvm.memset.p0.i64(ptr %A, i8 %Val, i64 400, i1 false)
@@ -218,17 +425,59 @@ entry:
;; non-local memset -> i16 load forwarding.
define i16 @memset_to_i16_nonlocal0(ptr %P, i1 %cond) {
-; CHECK-LABEL: @memset_to_i16_nonlocal0(
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; CHECK: T:
-; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 400, i1 false)
-; CHECK-NEXT: br label [[CONT:%.*]]
-; CHECK: F:
-; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false)
-; CHECK-NEXT: br label [[CONT]]
-; CHECK: Cont:
-; CHECK-NEXT: [[A:%.*]] = phi i16 [ 514, [[F]] ], [ 257, [[T]] ]
-; CHECK-NEXT: ret i16 [[A]]
+; LE-MEMDEP-LABEL: define i16 @memset_to_i16_nonlocal0(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false)
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false)
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i16 [ 514, %[[F]] ], [ 257, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i16 [[A]]
+;
+; LE-MEMSSA-LABEL: define i16 @memset_to_i16_nonlocal0(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false)
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false)
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 4
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i16, ptr [[P2]], align 2
+; LE-MEMSSA-NEXT: ret i16 [[A]]
+;
+; BE-MEMDEP-LABEL: define i16 @memset_to_i16_nonlocal0(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false)
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false)
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i16 [ 514, %[[F]] ], [ 257, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i16 [[A]]
+;
+; BE-MEMSSA-LABEL: define i16 @memset_to_i16_nonlocal0(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false)
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false)
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 4
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i16, ptr [[P2]], align 2
+; BE-MEMSSA-NEXT: ret i16 [[A]]
;
br i1 %cond, label %T, label %F
T:
@@ -251,10 +500,33 @@ Cont:
; memset -> float forwarding.
define float @memcpy_to_float_local(ptr %A) nounwind ssp {
-; CHECK-LABEL: @memcpy_to_float_local(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A:%.*]], ptr @GCst, i64 12, i1 false)
-; CHECK-NEXT: ret float 1.400000e+01
+; LE-MEMDEP-LABEL: define float @memcpy_to_float_local(
+; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false)
+; LE-MEMDEP-NEXT: ret float 1.400000e+01
+;
+; LE-MEMSSA-LABEL: define float @memcpy_to_float_local(
+; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false)
+; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
+; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; LE-MEMSSA-NEXT: ret float [[TTMP2]]
+;
+; BE-MEMDEP-LABEL: define float @memcpy_to_float_local(
+; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false)
+; BE-MEMDEP-NEXT: ret float 1.400000e+01
+;
+; BE-MEMSSA-LABEL: define float @memcpy_to_float_local(
+; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false)
+; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
+; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; BE-MEMSSA-NEXT: ret float [[TTMP2]]
;
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %A, ptr @GCst, i64 12, i1 false)
@@ -265,10 +537,33 @@ entry:
; memcpy from address space 1
define float @memcpy_to_float_local_as1(ptr %A) nounwind ssp {
-; CHECK-LABEL: @memcpy_to_float_local_as1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A:%.*]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
-; CHECK-NEXT: ret float 1.400000e+01
+; LE-MEMDEP-LABEL: define float @memcpy_to_float_local_as1(
+; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
+; LE-MEMDEP-NEXT: ret float 1.400000e+01
+;
+; LE-MEMSSA-LABEL: define float @memcpy_to_float_local_as1(
+; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
+; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
+; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; LE-MEMSSA-NEXT: ret float [[TTMP2]]
+;
+; BE-MEMDEP-LABEL: define float @memcpy_to_float_local_as1(
+; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
+; BE-MEMDEP-NEXT: ret float 1.400000e+01
+;
+; BE-MEMSSA-LABEL: define float @memcpy_to_float_local_as1(
+; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false)
+; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
+; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; BE-MEMSSA-NEXT: ret float [[TTMP2]]
;
entry:
tail call void @llvm.memcpy.p0.p1.i64(ptr %A, ptr addrspace(1) @GCst_as1, i64 12, i1 false)
@@ -279,29 +574,57 @@ entry:
;; non-local i32/float -> i8 load forwarding.
define i8 @coerce_mustalias_nonlocal0(ptr %P, i1 %cond) {
-; LE-LABEL: @coerce_mustalias_nonlocal0(
-; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; LE: T:
-; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; LE-NEXT: br label [[CONT:%.*]]
-; LE: F:
-; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; LE-NEXT: br label [[CONT]]
-; LE: Cont:
-; LE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
-; LE-NEXT: ret i8 [[A]]
-;
-; BE-LABEL: @coerce_mustalias_nonlocal0(
-; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; BE: T:
-; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; BE-NEXT: br label [[CONT:%.*]]
-; BE: F:
-; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; BE-NEXT: br label [[CONT]]
-; BE: Cont:
-; BE-NEXT: [[A:%.*]] = phi i8 [ 63, [[F]] ], [ 0, [[T]] ]
-; BE-NEXT: ret i8 [[A]]
+; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal0(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ 42, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal0(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal0(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 63, %[[F]] ], [ 0, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal0(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
br i1 %cond, label %T, label %F
T:
@@ -322,29 +645,57 @@ Cont:
;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
;; bitcast equivalence can be properly phi translated.
define i8 @coerce_mustalias_nonlocal1(ptr %P, i1 %cond) {
-; LE-LABEL: @coerce_mustalias_nonlocal1(
-; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; LE: T:
-; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; LE-NEXT: br label [[CONT:%.*]]
-; LE: F:
-; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; LE-NEXT: br label [[CONT]]
-; LE: Cont:
-; LE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
-; LE-NEXT: ret i8 [[A]]
-;
-; BE-LABEL: @coerce_mustalias_nonlocal1(
-; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; BE: T:
-; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; BE-NEXT: br label [[CONT:%.*]]
-; BE: F:
-; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; BE-NEXT: br label [[CONT]]
-; BE: Cont:
-; BE-NEXT: [[A:%.*]] = phi i8 [ 63, [[F]] ], [ 0, [[T]] ]
-; BE-NEXT: ret i8 [[A]]
+; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal1(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ 42, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal1(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal1(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 63, %[[F]] ], [ 0, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal1(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
br i1 %cond, label %T, label %F
T:
@@ -364,29 +715,55 @@ Cont:
;; non-local i32 -> i8 partial redundancy load forwarding.
define i8 @coerce_mustalias_pre0(ptr %P, i1 %cond) {
-; LE-LABEL: @coerce_mustalias_pre0(
-; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; LE: T:
-; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; LE-NEXT: br label [[CONT:%.*]]
-; LE: F:
-; LE-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1
-; LE-NEXT: br label [[CONT]]
-; LE: Cont:
-; LE-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 42, [[T]] ]
-; LE-NEXT: ret i8 [[A]]
-;
-; BE-LABEL: @coerce_mustalias_pre0(
-; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; BE: T:
-; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4
-; BE-NEXT: br label [[CONT:%.*]]
-; BE: F:
-; BE-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1
-; BE-NEXT: br label [[CONT]]
-; BE: Cont:
-; BE-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 0, [[T]] ]
-; BE-NEXT: ret i8 [[A]]
+; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_pre0(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 42, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_pre0(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_pre0(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_pre0(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
br i1 %cond, label %T, label %F
T:
@@ -410,17 +787,33 @@ Cont:
;; i32 -> i8 forwarding.
;; PR4216
define i8 @coerce_offset0(i32 %V, ptr %P) {
-; LE-LABEL: @coerce_offset0(
-; LE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 16
-; LE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; LE-NEXT: ret i8 [[TMP2]]
-;
-; BE-LABEL: @coerce_offset0(
-; BE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4
-; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 8
-; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; BE-NEXT: ret i8 [[TMP2]]
+; LE-MEMDEP-LABEL: define i8 @coerce_offset0(
+; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 16
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; LE-MEMDEP-NEXT: ret i8 [[TMP2]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_offset0(
+; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P3]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_offset0(
+; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 8
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; BE-MEMDEP-NEXT: ret i8 [[TMP2]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_offset0(
+; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P3]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
store i32 %V, ptr %P
@@ -432,29 +825,59 @@ define i8 @coerce_offset0(i32 %V, ptr %P) {
;; non-local i32/float -> i8 load forwarding.
define i8 @coerce_offset_nonlocal0(ptr %P, i1 %cond) {
-; LE-LABEL: @coerce_offset_nonlocal0(
-; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; LE: T:
-; LE-NEXT: store i32 57005, ptr [[P:%.*]], align 4
-; LE-NEXT: br label [[CONT:%.*]]
-; LE: F:
-; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; LE-NEXT: br label [[CONT]]
-; LE: Cont:
-; LE-NEXT: [[A:%.*]] = phi i8 [ -128, [[F]] ], [ 0, [[T]] ]
-; LE-NEXT: ret i8 [[A]]
-;
-; BE-LABEL: @coerce_offset_nonlocal0(
-; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; BE: T:
-; BE-NEXT: store i32 57005, ptr [[P:%.*]], align 4
-; BE-NEXT: br label [[CONT:%.*]]
-; BE: F:
-; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4
-; BE-NEXT: br label [[CONT]]
-; BE: Cont:
-; BE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ -34, [[T]] ]
-; BE-NEXT: ret i8 [[A]]
+; LE-MEMDEP-LABEL: define i8 @coerce_offset_nonlocal0(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: store i32 57005, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ -128, %[[F]] ], [ 0, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_offset_nonlocal0(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: store i32 57005, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_offset_nonlocal0(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: store i32 57005, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ -34, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_offset_nonlocal0(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: store i32 57005, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
%P4 = getelementptr i8, ptr %P, i32 2
br i1 %cond, label %T, label %F
@@ -475,18 +898,59 @@ Cont:
;; non-local i32 -> i8 partial redundancy load forwarding.
define i8 @coerce_offset_pre0(ptr %P, i1 %cond) {
-; CHECK-LABEL: @coerce_offset_pre0(
-; CHECK-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
-; CHECK: T:
-; CHECK-NEXT: store i32 42, ptr [[P]], align 4
-; CHECK-NEXT: br label [[CONT:%.*]]
-; CHECK: F:
-; CHECK-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1
-; CHECK-NEXT: br label [[CONT]]
-; CHECK: Cont:
-; CHECK-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 0, [[T]] ]
-; CHECK-NEXT: ret i8 [[A]]
+; LE-MEMDEP-LABEL: define i8 @coerce_offset_pre0(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMDEP: [[T]]:
+; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; LE-MEMDEP: [[F]]:
+; LE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1
+; LE-MEMDEP-NEXT: br label %[[CONT]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ]
+; LE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; LE-MEMSSA-LABEL: define i8 @coerce_offset_pre0(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; LE-MEMSSA: [[T]]:
+; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; LE-MEMSSA: [[F]]:
+; LE-MEMSSA-NEXT: br label %[[CONT]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1
+; LE-MEMSSA-NEXT: ret i8 [[A]]
+;
+; BE-MEMDEP-LABEL: define i8 @coerce_offset_pre0(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMDEP: [[T]]:
+; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[CONT:.*]]
+; BE-MEMDEP: [[F]]:
+; BE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1
+; BE-MEMDEP-NEXT: br label %[[CONT]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ]
+; BE-MEMDEP-NEXT: ret i8 [[A]]
+;
+; BE-MEMSSA-LABEL: define i8 @coerce_offset_pre0(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]]
+; BE-MEMSSA: [[T]]:
+; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[CONT:.*]]
+; BE-MEMSSA: [[F]]:
+; BE-MEMSSA-NEXT: br label %[[CONT]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1
+; BE-MEMSSA-NEXT: ret i8 [[A]]
;
%P4 = getelementptr i8, ptr %P, i32 2
br i1 %cond, label %T, label %F
@@ -504,20 +968,71 @@ Cont:
}
define i32 @chained_load(ptr %p, i32 %x, i32 %y) {
-; CHECK-LABEL: @chained_load(
-; CHECK-NEXT: block1:
-; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 4
-; CHECK-NEXT: [[Z:%.*]] = load ptr, ptr [[P:%.*]], align 4
-; CHECK-NEXT: store ptr [[Z]], ptr [[A]], align 4
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: br i1 [[CMP]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
-; CHECK: block2:
-; CHECK-NEXT: br label [[BLOCK4:%.*]]
-; CHECK: block3:
-; CHECK-NEXT: br label [[BLOCK4]]
-; CHECK: block4:
-; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4
-; CHECK-NEXT: ret i32 [[D]]
+; LE-MEMDEP-LABEL: define i32 @chained_load(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; LE-MEMDEP-NEXT: [[BLOCK1:.*:]]
+; LE-MEMDEP-NEXT: [[A:%.*]] = alloca ptr, align 4
+; LE-MEMDEP-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: store ptr [[Z]], ptr [[A]], align 4
+; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; LE-MEMDEP: [[BLOCK2]]:
+; LE-MEMDEP-NEXT: br label %[[BLOCK4:.*]]
+; LE-MEMDEP: [[BLOCK3]]:
+; LE-MEMDEP-NEXT: br label %[[BLOCK4]]
+; LE-MEMDEP: [[BLOCK4]]:
+; LE-MEMDEP-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4
+; LE-MEMDEP-NEXT: ret i32 [[D]]
+;
+; LE-MEMSSA-LABEL: define i32 @chained_load(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; LE-MEMSSA-NEXT: [[BLOCK1:.*:]]
+; LE-MEMSSA-NEXT: [[A:%.*]] = alloca ptr, align 4
+; LE-MEMSSA-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: store ptr [[Z]], ptr [[A]], align 4
+; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; LE-MEMSSA: [[BLOCK2]]:
+; LE-MEMSSA-NEXT: br label %[[BLOCK4:.*]]
+; LE-MEMSSA: [[BLOCK3]]:
+; LE-MEMSSA-NEXT: br label %[[BLOCK4]]
+; LE-MEMSSA: [[BLOCK4]]:
+; LE-MEMSSA-NEXT: [[C:%.*]] = load ptr, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4
+; LE-MEMSSA-NEXT: ret i32 [[D]]
+;
+; BE-MEMDEP-LABEL: define i32 @chained_load(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; BE-MEMDEP-NEXT: [[BLOCK1:.*:]]
+; BE-MEMDEP-NEXT: [[A:%.*]] = alloca ptr, align 4
+; BE-MEMDEP-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: store ptr [[Z]], ptr [[A]], align 4
+; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; BE-MEMDEP: [[BLOCK2]]:
+; BE-MEMDEP-NEXT: br label %[[BLOCK4:.*]]
+; BE-MEMDEP: [[BLOCK3]]:
+; BE-MEMDEP-NEXT: br label %[[BLOCK4]]
+; BE-MEMDEP: [[BLOCK4]]:
+; BE-MEMDEP-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4
+; BE-MEMDEP-NEXT: ret i32 [[D]]
+;
+; BE-MEMSSA-LABEL: define i32 @chained_load(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; BE-MEMSSA-NEXT: [[BLOCK1:.*:]]
+; BE-MEMSSA-NEXT: [[A:%.*]] = alloca ptr, align 4
+; BE-MEMSSA-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: store ptr [[Z]], ptr [[A]], align 4
+; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; BE-MEMSSA: [[BLOCK2]]:
+; BE-MEMSSA-NEXT: br label %[[BLOCK4:.*]]
+; BE-MEMSSA: [[BLOCK3]]:
+; BE-MEMSSA-NEXT: br label %[[BLOCK4]]
+; BE-MEMSSA: [[BLOCK4]]:
+; BE-MEMSSA-NEXT: [[C:%.*]] = load ptr, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4
+; BE-MEMSSA-NEXT: ret i32 [[D]]
;
block1:
%A = alloca ptr
@@ -547,27 +1062,27 @@ declare i1 @cond() readonly
declare i1 @cond2() readonly
define i32 @phi_trans2() {
-; CHECK-LABEL: @phi_trans2(
-; CHECK-NEXT: entry:
+; CHECK-LABEL: define i32 @phi_trans2() {
+; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[P:%.*]] = alloca i32, i32 400, align 4
-; CHECK-NEXT: br label [[F1:%.*]]
-; CHECK: F1:
-; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 2, [[F:%.*]] ]
+; CHECK-NEXT: br label %[[F1:.*]]
+; CHECK: [[F1]]:
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[F:.*]] ]
; CHECK-NEXT: [[COND2:%.*]] = call i1 @cond()
-; CHECK-NEXT: br i1 [[COND2]], label [[T1:%.*]], label [[TY:%.*]]
-; CHECK: T1:
+; CHECK-NEXT: br i1 [[COND2]], label %[[T1:.*]], label %[[TY:.*]]
+; CHECK: [[T1]]:
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 [[A]]
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P2]], align 4
; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2()
-; CHECK-NEXT: br i1 [[COND]], label [[TX:%.*]], label [[F]]
-; CHECK: F:
+; CHECK-NEXT: br i1 [[COND]], label %[[TX:.*]], label %[[F]]
+; CHECK: [[F]]:
; CHECK-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 2
; CHECK-NEXT: store i32 17, ptr [[P3]], align 4
; CHECK-NEXT: store i32 42, ptr [[P2]], align 4
-; CHECK-NEXT: br label [[F1]]
-; CHECK: TX:
+; CHECK-NEXT: br label %[[F1]]
+; CHECK: [[TX]]:
; CHECK-NEXT: ret i32 [[X]]
-; CHECK: TY:
+; CHECK: [[TY]]:
; CHECK-NEXT: ret i32 0
;
entry:
@@ -605,32 +1120,123 @@ TY:
}
define i32 @phi_trans3(ptr %p, i32 %x, i32 %y, i32 %z) {
-; CHECK-LABEL: @phi_trans3(
-; CHECK-NEXT: block1:
-; CHECK-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: br i1 [[CMPXY]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]]
-; CHECK: block2:
-; CHECK-NEXT: store i32 87, ptr [[P:%.*]], align 4
-; CHECK-NEXT: br label [[BLOCK4:%.*]]
-; CHECK: block3:
-; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43
-; CHECK-NEXT: store i32 97, ptr [[P2]], align 4
-; CHECK-NEXT: br label [[BLOCK4]]
-; CHECK: block4:
-; CHECK-NEXT: [[D:%.*]] = phi i32 [ 87, [[BLOCK2]] ], [ 97, [[BLOCK3]] ]
-; CHECK-NEXT: br i1 [[CMPXY]], label [[BLOCK5:%.*]], label [[EXIT:%.*]]
-; CHECK: block5:
-; CHECK-NEXT: br i1 true, label [[BLOCK6:%.*]], label [[BLOCK5_EXIT_CRIT_EDGE:%.*]]
-; CHECK: block5.exit_crit_edge:
-; CHECK-NEXT: br label [[EXIT]]
-; CHECK: block6:
-; CHECK-NEXT: br i1 true, label [[BLOCK7:%.*]], label [[BLOCK6_EXIT_CRIT_EDGE:%.*]]
-; CHECK: block6.exit_crit_edge:
-; CHECK-NEXT: br label [[EXIT]]
-; CHECK: block7:
-; CHECK-NEXT: ret i32 [[D]]
-; CHECK: exit:
-; CHECK-NEXT: ret i32 -1
+; LE-MEMDEP-LABEL: define i32 @phi_trans3(
+; LE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; LE-MEMDEP-NEXT: [[BLOCK1:.*:]]
+; LE-MEMDEP-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]]
+; LE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; LE-MEMDEP: [[BLOCK2]]:
+; LE-MEMDEP-NEXT: store i32 87, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: br label %[[BLOCK4:.*]]
+; LE-MEMDEP: [[BLOCK3]]:
+; LE-MEMDEP-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43
+; LE-MEMDEP-NEXT: store i32 97, ptr [[P2]], align 4
+; LE-MEMDEP-NEXT: br label %[[BLOCK4]]
+; LE-MEMDEP: [[BLOCK4]]:
+; LE-MEMDEP-NEXT: [[D:%.*]] = phi i32 [ 87, %[[BLOCK2]] ], [ 97, %[[BLOCK3]] ]
+; LE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]]
+; LE-MEMDEP: [[BLOCK5]]:
+; LE-MEMDEP-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]]
+; LE-MEMDEP: [[BLOCK5_EXIT_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: br label %[[EXIT]]
+; LE-MEMDEP: [[BLOCK6]]:
+; LE-MEMDEP-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]]
+; LE-MEMDEP: [[BLOCK6_EXIT_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: br label %[[EXIT]]
+; LE-MEMDEP: [[BLOCK7]]:
+; LE-MEMDEP-NEXT: ret i32 [[D]]
+; LE-MEMDEP: [[EXIT]]:
+; LE-MEMDEP-NEXT: ret i32 -1
+;
+; LE-MEMSSA-LABEL: define i32 @phi_trans3(
+; LE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; LE-MEMSSA-NEXT: [[BLOCK1:.*:]]
+; LE-MEMSSA-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]]
+; LE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; LE-MEMSSA: [[BLOCK2]]:
+; LE-MEMSSA-NEXT: store i32 87, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: br label %[[BLOCK4:.*]]
+; LE-MEMSSA: [[BLOCK3]]:
+; LE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43
+; LE-MEMSSA-NEXT: store i32 97, ptr [[P2]], align 4
+; LE-MEMSSA-NEXT: br label %[[BLOCK4]]
+; LE-MEMSSA: [[BLOCK4]]:
+; LE-MEMSSA-NEXT: [[A:%.*]] = phi i32 [ -1, %[[BLOCK2]] ], [ 42, %[[BLOCK3]] ]
+; LE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]]
+; LE-MEMSSA: [[BLOCK5]]:
+; LE-MEMSSA-NEXT: [[B:%.*]] = add i32 [[A]], 1
+; LE-MEMSSA-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]]
+; LE-MEMSSA: [[BLOCK5_EXIT_CRIT_EDGE]]:
+; LE-MEMSSA-NEXT: br label %[[EXIT]]
+; LE-MEMSSA: [[BLOCK6]]:
+; LE-MEMSSA-NEXT: [[C:%.*]] = getelementptr i32, ptr [[P]], i32 [[B]]
+; LE-MEMSSA-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]]
+; LE-MEMSSA: [[BLOCK6_EXIT_CRIT_EDGE]]:
+; LE-MEMSSA-NEXT: br label %[[EXIT]]
+; LE-MEMSSA: [[BLOCK7]]:
+; LE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4
+; LE-MEMSSA-NEXT: ret i32 [[D]]
+; LE-MEMSSA: [[EXIT]]:
+; LE-MEMSSA-NEXT: ret i32 -1
+;
+; BE-MEMDEP-LABEL: define i32 @phi_trans3(
+; BE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; BE-MEMDEP-NEXT: [[BLOCK1:.*:]]
+; BE-MEMDEP-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]]
+; BE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; BE-MEMDEP: [[BLOCK2]]:
+; BE-MEMDEP-NEXT: store i32 87, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: br label %[[BLOCK4:.*]]
+; BE-MEMDEP: [[BLOCK3]]:
+; BE-MEMDEP-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43
+; BE-MEMDEP-NEXT: store i32 97, ptr [[P2]], align 4
+; BE-MEMDEP-NEXT: br label %[[BLOCK4]]
+; BE-MEMDEP: [[BLOCK4]]:
+; BE-MEMDEP-NEXT: [[D:%.*]] = phi i32 [ 87, %[[BLOCK2]] ], [ 97, %[[BLOCK3]] ]
+; BE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]]
+; BE-MEMDEP: [[BLOCK5]]:
+; BE-MEMDEP-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]]
+; BE-MEMDEP: [[BLOCK5_EXIT_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: br label %[[EXIT]]
+; BE-MEMDEP: [[BLOCK6]]:
+; BE-MEMDEP-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]]
+; BE-MEMDEP: [[BLOCK6_EXIT_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: br label %[[EXIT]]
+; BE-MEMDEP: [[BLOCK7]]:
+; BE-MEMDEP-NEXT: ret i32 [[D]]
+; BE-MEMDEP: [[EXIT]]:
+; BE-MEMDEP-NEXT: ret i32 -1
+;
+; BE-MEMSSA-LABEL: define i32 @phi_trans3(
+; BE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; BE-MEMSSA-NEXT: [[BLOCK1:.*:]]
+; BE-MEMSSA-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]]
+; BE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]]
+; BE-MEMSSA: [[BLOCK2]]:
+; BE-MEMSSA-NEXT: store i32 87, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: br label %[[BLOCK4:.*]]
+; BE-MEMSSA: [[BLOCK3]]:
+; BE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43
+; BE-MEMSSA-NEXT: store i32 97, ptr [[P2]], align 4
+; BE-MEMSSA-NEXT: br label %[[BLOCK4]]
+; BE-MEMSSA: [[BLOCK4]]:
+; BE-MEMSSA-NEXT: [[A:%.*]] = phi i32 [ -1, %[[BLOCK2]] ], [ 42, %[[BLOCK3]] ]
+; BE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]]
+; BE-MEMSSA: [[BLOCK5]]:
+; BE-MEMSSA-NEXT: [[B:%.*]] = add i32 [[A]], 1
+; BE-MEMSSA-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]]
+; BE-MEMSSA: [[BLOCK5_EXIT_CRIT_EDGE]]:
+; BE-MEMSSA-NEXT: br label %[[EXIT]]
+; BE-MEMSSA: [[BLOCK6]]:
+; BE-MEMSSA-NEXT: [[C:%.*]] = getelementptr i32, ptr [[P]], i32 [[B]]
+; BE-MEMSSA-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]]
+; BE-MEMSSA: [[BLOCK6_EXIT_CRIT_EDGE]]:
+; BE-MEMSSA-NEXT: br label %[[EXIT]]
+; BE-MEMSSA: [[BLOCK7]]:
+; BE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4
+; BE-MEMSSA-NEXT: ret i32 [[D]]
+; BE-MEMSSA: [[EXIT]]:
+; BE-MEMSSA-NEXT: ret i32 -1
;
block1:
%cmpxy = icmp eq i32 %x, %y
@@ -668,21 +1274,77 @@ exit:
}
define i8 @phi_trans4(ptr %p) {
-; CHECK-LABEL: @phi_trans4(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 192
-; CHECK-NEXT: store i8 -64, ptr [[X3]], align 1
-; CHECK-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; CHECK-NEXT: [[Y2_PRE:%.*]] = load i8, ptr [[X]], align 1
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[Y2:%.*]] = phi i8 [ [[Y2_PRE]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ]
-; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2()
-; CHECK-NEXT: store i32 0, ptr [[X3]], align 4
-; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[OUT:%.*]]
-; CHECK: out:
-; CHECK-NEXT: [[R:%.*]] = add i8 [[Y2_PRE]], [[Y2]]
-; CHECK-NEXT: ret i8 [[R]]
+; LE-MEMDEP-LABEL: define i8 @phi_trans4(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192
+; LE-MEMDEP-NEXT: store i8 -64, ptr [[X3]], align 1
+; LE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; LE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; LE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; LE-MEMDEP: [[LOOP]]:
+; LE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ 0, %[[LOOP]] ]
+; LE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2()
+; LE-MEMDEP-NEXT: store i32 0, ptr [[X3]], align 4
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]]
+; LE-MEMDEP: [[OUT]]:
+; LE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; LE-MEMDEP-NEXT: ret i8 [[R]]
+;
+; LE-MEMSSA-LABEL: define i8 @phi_trans4(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192
+; LE-MEMSSA-NEXT: store i8 -64, ptr [[X3]], align 1
+; LE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; LE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; LE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; LE-MEMSSA: [[LOOP]]:
+; LE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 192, %[[LOOP]] ]
+; LE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; LE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1
+; LE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2()
+; LE-MEMSSA-NEXT: store i32 0, ptr [[X3]], align 4
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]]
+; LE-MEMSSA: [[OUT]]:
+; LE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; LE-MEMSSA-NEXT: ret i8 [[R]]
+;
+; BE-MEMDEP-LABEL: define i8 @phi_trans4(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192
+; BE-MEMDEP-NEXT: store i8 -64, ptr [[X3]], align 1
+; BE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; BE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; BE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; BE-MEMDEP: [[LOOP]]:
+; BE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ 0, %[[LOOP]] ]
+; BE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2()
+; BE-MEMDEP-NEXT: store i32 0, ptr [[X3]], align 4
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]]
+; BE-MEMDEP: [[OUT]]:
+; BE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; BE-MEMDEP-NEXT: ret i8 [[R]]
+;
+; BE-MEMSSA-LABEL: define i8 @phi_trans4(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192
+; BE-MEMSSA-NEXT: store i8 -64, ptr [[X3]], align 1
+; BE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; BE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; BE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; BE-MEMSSA: [[LOOP]]:
+; BE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 192, %[[LOOP]] ]
+; BE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; BE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1
+; BE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2()
+; BE-MEMSSA-NEXT: store i32 0, ptr [[X3]], align 4
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]]
+; BE-MEMSSA: [[OUT]]:
+; BE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; BE-MEMSSA-NEXT: ret i8 [[R]]
;
entry:
%X3 = getelementptr i8, ptr %p, i32 192
@@ -709,28 +1371,97 @@ out:
}
define i8 @phi_trans5(ptr %p) {
-; CHECK-LABEL: @phi_trans5(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
-; CHECK-NEXT: store i8 19, ptr [[X4]], align 1
-; CHECK-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
-; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], [[ENTRY:%.*]] ], [ [[Y2_PRE:%.*]], [[CONT:%.*]] ]
-; CHECK-NEXT: [[I:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ 3, [[CONT]] ]
-; CHECK-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
-; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2()
-; CHECK-NEXT: br i1 [[COND]], label [[CONT]], label [[OUT:%.*]]
-; CHECK: cont:
-; CHECK-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1
-; CHECK-NEXT: store i32 50462976, ptr [[Z]], align 4
-; CHECK-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; CHECK-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1
-; CHECK-NEXT: br label [[LOOP]]
-; CHECK: out:
-; CHECK-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
-; CHECK-NEXT: ret i8 [[R]]
+; LE-MEMDEP-LABEL: define i8 @phi_trans5(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMDEP-NEXT: store i8 19, ptr [[X4]], align 1
+; LE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; LE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; LE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; LE-MEMDEP: [[LOOP]]:
+; LE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ [[Y2_PRE:%.*]], %[[CONT:.*]] ]
+; LE-MEMDEP-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT]] ]
+; LE-MEMDEP-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; LE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2()
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]]
+; LE-MEMDEP: [[CONT]]:
+; LE-MEMDEP-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1
+; LE-MEMDEP-NEXT: store i32 50462976, ptr [[Z]], align 4
+; LE-MEMDEP-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; LE-MEMDEP-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1
+; LE-MEMDEP-NEXT: br label %[[LOOP]]
+; LE-MEMDEP: [[OUT]]:
+; LE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; LE-MEMDEP-NEXT: ret i8 [[R]]
+;
+; LE-MEMSSA-LABEL: define i8 @phi_trans5(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; LE-MEMSSA-NEXT: store i8 19, ptr [[X4]], align 1
+; LE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; LE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; LE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; LE-MEMSSA: [[LOOP]]:
+; LE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT:.*]] ]
+; LE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; LE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1
+; LE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2()
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]]
+; LE-MEMSSA: [[CONT]]:
+; LE-MEMSSA-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1
+; LE-MEMSSA-NEXT: store i32 50462976, ptr [[Z]], align 4
+; LE-MEMSSA-NEXT: br label %[[LOOP]]
+; LE-MEMSSA: [[OUT]]:
+; LE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; LE-MEMSSA-NEXT: ret i8 [[R]]
+;
+; BE-MEMDEP-LABEL: define i8 @phi_trans5(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMDEP-NEXT: store i8 19, ptr [[X4]], align 1
+; BE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; BE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; BE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; BE-MEMDEP: [[LOOP]]:
+; BE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ [[Y2_PRE:%.*]], %[[CONT:.*]] ]
+; BE-MEMDEP-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT]] ]
+; BE-MEMDEP-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; BE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2()
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]]
+; BE-MEMDEP: [[CONT]]:
+; BE-MEMDEP-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1
+; BE-MEMDEP-NEXT: store i32 50462976, ptr [[Z]], align 4
+; BE-MEMDEP-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3
+; BE-MEMDEP-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1
+; BE-MEMDEP-NEXT: br label %[[LOOP]]
+; BE-MEMDEP: [[OUT]]:
+; BE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; BE-MEMDEP-NEXT: ret i8 [[R]]
+;
+; BE-MEMSSA-LABEL: define i8 @phi_trans5(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; BE-MEMSSA-NEXT: store i8 19, ptr [[X4]], align 1
+; BE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4
+; BE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1
+; BE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; BE-MEMSSA: [[LOOP]]:
+; BE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT:.*]] ]
+; BE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]]
+; BE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1
+; BE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2()
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]]
+; BE-MEMSSA: [[CONT]]:
+; BE-MEMSSA-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1
+; BE-MEMSSA-NEXT: store i32 50462976, ptr [[Z]], align 4
+; BE-MEMSSA-NEXT: br label %[[LOOP]]
+; BE-MEMSSA: [[OUT]]:
+; BE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]]
+; BE-MEMSSA-NEXT: ret i8 [[R]]
;
entry:
@@ -766,24 +1497,79 @@ declare void @use_i32(i32) readonly
; into header. Make sure we translate the address for %l1 correctly where
; parts of the address computations are in different basic blocks.
define i32 @phi_trans6(ptr noalias nocapture readonly %x, i1 %cond) {
-; CHECK-LABEL: @phi_trans6(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4
-; CHECK-NEXT: call void @use_i32(i32 [[L0]])
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[L1_PRE:%.*]] = phi i32 [ [[L0]], [[ENTRY:%.*]] ], [ [[L1_PRE1:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE]] ]
-; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans6, [[LATCH:%.*]]), [label %latch]
-; CHECK: latch:
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]]
-; CHECK: latch.header_crit_edge:
-; CHECK-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]]
-; CHECK-NEXT: [[L1_PRE1]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4
-; CHECK-NEXT: br label [[HEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret i32 [[L1_PRE]]
+; LE-MEMDEP-LABEL: define i32 @phi_trans6(
+; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; LE-MEMDEP: [[HEADER]]:
+; LE-MEMDEP-NEXT: [[L1:%.*]] = phi i32 [ [[L0]], %[[ENTRY]] ], [ [[L1_PRE:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE]] ]
+; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH:.*]]), [label %latch]
+; LE-MEMDEP: [[LATCH]]:
+; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]]
+; LE-MEMDEP-NEXT: [[L1_PRE]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4
+; LE-MEMDEP-NEXT: br label %[[HEADER]]
+; LE-MEMDEP: [[EXIT]]:
+; LE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; LE-MEMSSA-LABEL: define i32 @phi_trans6(
+; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; LE-MEMSSA: [[HEADER]]:
+; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH]]), [label %latch]
+; LE-MEMSSA: [[LATCH]]:
+; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV]]
+; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; LE-MEMSSA: [[EXIT]]:
+; LE-MEMSSA-NEXT: ret i32 [[L1]]
+;
+; BE-MEMDEP-LABEL: define i32 @phi_trans6(
+; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; BE-MEMDEP: [[HEADER]]:
+; BE-MEMDEP-NEXT: [[L1:%.*]] = phi i32 [ [[L0]], %[[ENTRY]] ], [ [[L1_PRE:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE]] ]
+; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH:.*]]), [label %latch]
+; BE-MEMDEP: [[LATCH]]:
+; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]]
+; BE-MEMDEP-NEXT: [[L1_PRE]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4
+; BE-MEMDEP-NEXT: br label %[[HEADER]]
+; BE-MEMDEP: [[EXIT]]:
+; BE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; BE-MEMSSA-LABEL: define i32 @phi_trans6(
+; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; BE-MEMSSA: [[HEADER]]:
+; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH]]), [label %latch]
+; BE-MEMSSA: [[LATCH]]:
+; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV]]
+; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; BE-MEMSSA: [[EXIT]]:
+; BE-MEMSSA-NEXT: ret i32 [[L1]]
;
entry:
%l0 = load i32, ptr %x
@@ -806,24 +1592,81 @@ exit:
; FIXME: Currently we fail to translate the PHI in this case.
define i32 @phi_trans7(ptr noalias nocapture readonly %x, i1 %cond) {
-; CHECK-LABEL: @phi_trans7(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4
-; CHECK-NEXT: call void @use_i32(i32 [[L0]])
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ]
-; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
-; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans7, [[LATCH:%.*]]), [label %latch]
-; CHECK: latch:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
-; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]]
-; CHECK: latch.header_crit_edge:
-; CHECK-NEXT: br label [[HEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret i32 [[L1]]
+; LE-MEMDEP-LABEL: define i32 @phi_trans7(
+; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; LE-MEMDEP: [[HEADER]]:
+; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; LE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH:.*]]), [label %latch]
+; LE-MEMDEP: [[LATCH]]:
+; LE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; LE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: br label %[[HEADER]]
+; LE-MEMDEP: [[EXIT]]:
+; LE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; LE-MEMSSA-LABEL: define i32 @phi_trans7(
+; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; LE-MEMSSA: [[HEADER]]:
+; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; LE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH]]), [label %latch]
+; LE-MEMSSA: [[LATCH]]:
+; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; LE-MEMSSA: [[EXIT]]:
+; LE-MEMSSA-NEXT: ret i32 [[L1]]
+;
+; BE-MEMDEP-LABEL: define i32 @phi_trans7(
+; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; BE-MEMDEP: [[HEADER]]:
+; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; BE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH:.*]]), [label %latch]
+; BE-MEMDEP: [[LATCH]]:
+; BE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; BE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: br label %[[HEADER]]
+; BE-MEMDEP: [[EXIT]]:
+; BE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; BE-MEMSSA-LABEL: define i32 @phi_trans7(
+; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; BE-MEMSSA: [[HEADER]]:
+; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; BE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH]]), [label %latch]
+; BE-MEMSSA: [[LATCH]]:
+; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; BE-MEMSSA: [[EXIT]]:
+; BE-MEMSSA-NEXT: ret i32 [[L1]]
;
entry:
%l0 = load i32, ptr %x
@@ -847,24 +1690,81 @@ exit:
; FIXME: Currently we fail to translate the PHI in this case.
define i32 @phi_trans8(ptr noalias nocapture readonly %x, i1 %cond) {
-; CHECK-LABEL: @phi_trans8(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4
-; CHECK-NEXT: call void @use_i32(i32 [[L0]])
-; CHECK-NEXT: br label [[HEADER:%.*]]
-; CHECK: header:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ]
-; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans8, [[LATCH:%.*]]), [label %latch]
-; CHECK: latch:
-; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
-; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]]
-; CHECK: latch.header_crit_edge:
-; CHECK-NEXT: br label [[HEADER]]
-; CHECK: exit:
-; CHECK-NEXT: ret i32 [[L1]]
+; LE-MEMDEP-LABEL: define i32 @phi_trans8(
+; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; LE-MEMDEP: [[HEADER]]:
+; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH:.*]]), [label %latch]
+; LE-MEMDEP: [[LATCH]]:
+; LE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; LE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; LE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: br label %[[HEADER]]
+; LE-MEMDEP: [[EXIT]]:
+; LE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; LE-MEMSSA-LABEL: define i32 @phi_trans8(
+; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; LE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; LE-MEMSSA: [[HEADER]]:
+; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH]]), [label %latch]
+; LE-MEMSSA: [[LATCH]]:
+; LE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; LE-MEMSSA: [[EXIT]]:
+; LE-MEMSSA-NEXT: ret i32 [[L1]]
+;
+; BE-MEMDEP-LABEL: define i32 @phi_trans8(
+; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMDEP-NEXT: br label %[[HEADER:.*]]
+; BE-MEMDEP: [[HEADER]]:
+; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ]
+; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH:.*]]), [label %latch]
+; BE-MEMDEP: [[LATCH]]:
+; BE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; BE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; BE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]]
+; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: br label %[[HEADER]]
+; BE-MEMDEP: [[EXIT]]:
+; BE-MEMDEP-NEXT: ret i32 [[L1]]
+;
+; BE-MEMSSA-LABEL: define i32 @phi_trans8(
+; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]])
+; BE-MEMSSA-NEXT: br label %[[HEADER:.*]]
+; BE-MEMSSA: [[HEADER]]:
+; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH]]), [label %latch]
+; BE-MEMSSA: [[LATCH]]:
+; BE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2
+; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]]
+; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4
+; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]]
+; BE-MEMSSA: [[EXIT]]:
+; BE-MEMSSA-NEXT: ret i32 [[L1]]
;
entry:
%l0 = load i32, ptr %x
@@ -890,11 +1790,35 @@ exit:
; PR6642
define i32 @memset_to_load() nounwind readnone {
-; CHECK-LABEL: @memset_to_load(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
-; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
-; CHECK-NEXT: ret i32 0
+; LE-MEMDEP-LABEL: define i32 @memset_to_load(
+; LE-MEMDEP-SAME: ) #[[ATTR2:[0-9]+]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
+; LE-MEMDEP-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
+; LE-MEMDEP-NEXT: ret i32 0
+;
+; LE-MEMSSA-LABEL: define i32 @memset_to_load(
+; LE-MEMSSA-SAME: ) #[[ATTR2:[0-9]+]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
+; LE-MEMSSA-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
+; LE-MEMSSA-NEXT: [[TTMP1:%.*]] = load i32, ptr [[X]], align 4
+; LE-MEMSSA-NEXT: ret i32 [[TTMP1]]
+;
+; BE-MEMDEP-LABEL: define i32 @memset_to_load(
+; BE-MEMDEP-SAME: ) #[[ATTR2:[0-9]+]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
+; BE-MEMDEP-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
+; BE-MEMDEP-NEXT: ret i32 0
+;
+; BE-MEMSSA-LABEL: define i32 @memset_to_load(
+; BE-MEMSSA-SAME: ) #[[ATTR2:[0-9]+]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
+; BE-MEMSSA-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false)
+; BE-MEMSSA-NEXT: [[TTMP1:%.*]] = load i32, ptr [[X]], align 4
+; BE-MEMSSA-NEXT: ret i32 [[TTMP1]]
;
entry:
%x = alloca [256 x i32], align 4 ; <ptr> [#uses=2]
@@ -910,23 +1834,45 @@ entry:
;;===----------------------------------------------------------------------===;;
define i32 @load_load_partial_alias(ptr %P) nounwind ssp {
-; LE-LABEL: @load_load_partial_alias(
-; LE-NEXT: entry:
-; LE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4
-; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 8
-; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; LE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-; LE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
-; LE-NEXT: ret i32 [[ADD]]
-;
-; BE-LABEL: @load_load_partial_alias(
-; BE-NEXT: entry:
-; BE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4
-; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 16
-; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
-; BE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
-; BE-NEXT: ret i32 [[ADD]]
+; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 8
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; LE-MEMDEP-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+; LE-MEMDEP-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
+; LE-MEMDEP-NEXT: ret i32 [[ADD]]
+;
+; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
+; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ADD_PTR]], align 1
+; LE-MEMSSA-NEXT: [[CONV:%.*]] = zext i8 [[TTMP5]] to i32
+; LE-MEMSSA-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
+; LE-MEMSSA-NEXT: ret i32 [[ADD]]
+;
+; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 16
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; BE-MEMDEP-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32
+; BE-MEMDEP-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
+; BE-MEMDEP-NEXT: ret i32 [[ADD]]
+;
+; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
+; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ADD_PTR]], align 1
+; BE-MEMSSA-NEXT: [[CONV:%.*]] = zext i8 [[TTMP5]] to i32
+; BE-MEMSSA-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
+; BE-MEMSSA-NEXT: ret i32 [[ADD]]
;
entry:
%ttmp2 = load i32, ptr %P
@@ -940,31 +1886,61 @@ entry:
; Cross block partial alias case.
define i32 @load_load_partial_alias_cross_block(ptr %P) nounwind ssp {
-; LE-LABEL: @load_load_partial_alias_cross_block(
-; LE-NEXT: entry:
-; LE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; LE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
-; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; LE-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]]
-; LE: land.lhs.true:
-; LE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
-; LE-NEXT: ret i32 [[CONV6]]
-; LE: if.end:
-; LE-NEXT: ret i32 52
-;
-; BE-LABEL: @load_load_partial_alias_cross_block(
-; BE-NEXT: entry:
-; BE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; BE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
-; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; BE-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]]
-; BE: land.lhs.true:
-; BE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
-; BE-NEXT: ret i32 [[CONV6]]
-; BE: if.end:
-; BE-NEXT: ret i32 52
+; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]]
+; LE-MEMDEP: [[LAND_LHS_TRUE]]:
+; LE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
+; LE-MEMDEP-NEXT: ret i32 [[CONV6]]
+; LE-MEMDEP: [[IF_END]]:
+; LE-MEMDEP-NEXT: ret i32 52
+;
+; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]]
+; LE-MEMSSA: [[LAND_LHS_TRUE]]:
+; LE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
+; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; LE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; LE-MEMSSA-NEXT: ret i32 [[CONV6]]
+; LE-MEMSSA: [[IF_END]]:
+; LE-MEMSSA-NEXT: ret i32 52
+;
+; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]]
+; BE-MEMDEP: [[LAND_LHS_TRUE]]:
+; BE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
+; BE-MEMDEP-NEXT: ret i32 [[CONV6]]
+; BE-MEMDEP: [[IF_END]]:
+; BE-MEMDEP-NEXT: ret i32 52
+;
+; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]]
+; BE-MEMSSA: [[LAND_LHS_TRUE]]:
+; BE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1
+; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; BE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; BE-MEMSSA-NEXT: ret i32 [[CONV6]]
+; BE-MEMSSA: [[IF_END]]:
+; BE-MEMSSA-NEXT: ret i32 52
;
entry:
%x1 = load i32, ptr %P, align 4
@@ -982,45 +1958,85 @@ if.end:
}
define i32 @load_load_partial_alias_cross_block_phi_trans(ptr %P) nounwind {
-; LE-LABEL: @load_load_partial_alias_cross_block_phi_trans(
-; LE-NEXT: entry:
-; LE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; LE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
-; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8
-; LE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; LE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
-; LE: if:
-; LE-NEXT: br label [[JOIN:%.*]]
-; LE: else:
-; LE-NEXT: br label [[JOIN]]
-; LE: join:
-; LE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
-; LE-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
-; LE-NEXT: ret i32 [[CONV6]]
-; LE: if.end:
-; LE-NEXT: ret i32 52
-;
-; BE-LABEL: @load_load_partial_alias_cross_block_phi_trans(
-; BE-NEXT: entry:
-; BE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4
-; BE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
-; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
-; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16
-; BE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
-; BE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
-; BE: if:
-; BE-NEXT: br label [[JOIN:%.*]]
-; BE: else:
-; BE-NEXT: br label [[JOIN]]
-; BE: join:
-; BE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
-; BE-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
-; BE-NEXT: ret i32 [[CONV6]]
-; BE: if.end:
-; BE-NEXT: ret i32 52
+; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR3]] {
+; LE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; LE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; LE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8
+; LE-MEMDEP-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; LE-MEMDEP: [[IF]]:
+; LE-MEMDEP-NEXT: br label %[[JOIN:.*]]
+; LE-MEMDEP: [[ELSE]]:
+; LE-MEMDEP-NEXT: br label %[[JOIN]]
+; LE-MEMDEP: [[JOIN]]:
+; LE-MEMDEP-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], %[[IF]] ], [ [[TMP1]], %[[ELSE]] ]
+; LE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; LE-MEMDEP-NEXT: ret i32 [[CONV6]]
+; LE-MEMDEP: [[IF_END:.*:]]
+; LE-MEMDEP-NEXT: ret i32 52
+;
+; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR3]] {
+; LE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; LE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; LE-MEMSSA: [[IF]]:
+; LE-MEMSSA-NEXT: br label %[[JOIN:.*]]
+; LE-MEMSSA: [[ELSE]]:
+; LE-MEMSSA-NEXT: br label %[[JOIN]]
+; LE-MEMSSA: [[JOIN]]:
+; LE-MEMSSA-NEXT: [[IDX:%.*]] = phi i64 [ 1, %[[IF]] ], [ 2, %[[ELSE]] ]
+; LE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; LE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; LE-MEMSSA-NEXT: ret i32 [[CONV6]]
+; LE-MEMSSA: [[IF_END:.*:]]
+; LE-MEMSSA-NEXT: ret i32 52
+;
+; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR3]] {
+; BE-MEMDEP-NEXT: [[ENTRY:.*:]]
+; BE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16
+; BE-MEMDEP-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; BE-MEMDEP: [[IF]]:
+; BE-MEMDEP-NEXT: br label %[[JOIN:.*]]
+; BE-MEMDEP: [[ELSE]]:
+; BE-MEMDEP-NEXT: br label %[[JOIN]]
+; BE-MEMDEP: [[JOIN]]:
+; BE-MEMDEP-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], %[[IF]] ], [ [[TMP1]], %[[ELSE]] ]
+; BE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; BE-MEMDEP-NEXT: ret i32 [[CONV6]]
+; BE-MEMDEP: [[IF_END:.*:]]
+; BE-MEMDEP-NEXT: ret i32 52
+;
+; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR3]] {
+; BE-MEMSSA-NEXT: [[ENTRY:.*:]]
+; BE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4
+; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
+; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; BE-MEMSSA: [[IF]]:
+; BE-MEMSSA-NEXT: br label %[[JOIN:.*]]
+; BE-MEMSSA: [[ELSE]]:
+; BE-MEMSSA-NEXT: br label %[[JOIN]]
+; BE-MEMSSA: [[JOIN]]:
+; BE-MEMSSA-NEXT: [[IDX:%.*]] = phi i64 [ 1, %[[IF]] ], [ 2, %[[ELSE]] ]
+; BE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]]
+; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+; BE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
+; BE-MEMSSA-NEXT: ret i32 [[CONV6]]
+; BE-MEMSSA: [[IF_END:.*:]]
+; BE-MEMSSA-NEXT: ret i32 52
;
entry:
%x1 = load i32, ptr %P, align 4
@@ -1047,58 +2063,104 @@ if.end:
}
define void @load_load_partial_alias_loop(ptr %P) {
-; LE-LABEL: @load_load_partial_alias_loop(
-; LE-NEXT: entry:
-; LE-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; LE-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
-; LE-NEXT: call void @use.i8(i8 [[V_1]])
-; LE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
-; LE-NEXT: call void @use.i32(i32 [[V_1_32]])
-; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
-; LE-NEXT: br label [[LOOP:%.*]]
-; LE: loop:
-; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
-; LE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
-; LE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
-; LE-NEXT: call void @use.i8(i8 [[V_I]])
-; LE-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
-; LE-NEXT: call void @use.i32(i32 [[V_I_32]])
-; LE-NEXT: [[I_INC]] = add i64 [[I]], 1
-; LE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
-; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
-; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8
-; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
-; LE: loop.loop_crit_edge:
-; LE-NEXT: br label [[LOOP]]
-; LE: exit:
-; LE-NEXT: ret void
-;
-; BE-LABEL: @load_load_partial_alias_loop(
-; BE-NEXT: entry:
-; BE-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
-; BE-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
-; BE-NEXT: call void @use.i8(i8 [[V_1]])
-; BE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
-; BE-NEXT: call void @use.i32(i32 [[V_1_32]])
-; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
-; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
-; BE-NEXT: br label [[LOOP:%.*]]
-; BE: loop:
-; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
-; BE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
-; BE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
-; BE-NEXT: call void @use.i8(i8 [[V_I]])
-; BE-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
-; BE-NEXT: call void @use.i32(i32 [[V_I_32]])
-; BE-NEXT: [[I_INC]] = add i64 [[I]], 1
-; BE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
-; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
-; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8
-; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
-; BE: loop.loop_crit_edge:
-; BE-NEXT: br label [[LOOP]]
-; BE: exit:
-; BE-NEXT: ret void
+; LE-MEMDEP-LABEL: define void @load_load_partial_alias_loop(
+; LE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; LE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; LE-MEMDEP-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; LE-MEMDEP-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
+; LE-MEMDEP-NEXT: call void @use.i8(i8 [[V_1]])
+; LE-MEMDEP-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
+; LE-MEMDEP-NEXT: call void @use.i32(i32 [[V_1_32]])
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
+; LE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; LE-MEMDEP: [[LOOP]]:
+; LE-MEMDEP-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[LOOP_LOOP_CRIT_EDGE:.*]] ]
+; LE-MEMDEP-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_LOOP_CRIT_EDGE]] ]
+; LE-MEMDEP-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
+; LE-MEMDEP-NEXT: call void @use.i8(i8 [[V_I]])
+; LE-MEMDEP-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
+; LE-MEMDEP-NEXT: call void @use.i32(i32 [[V_I_32]])
+; LE-MEMDEP-NEXT: [[I_INC]] = add i64 [[I]], 1
+; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
+; LE-MEMDEP-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8
+; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LOOP_LOOP_CRIT_EDGE]], label %[[EXIT:.*]]
+; LE-MEMDEP: [[LOOP_LOOP_CRIT_EDGE]]:
+; LE-MEMDEP-NEXT: br label %[[LOOP]]
+; LE-MEMDEP: [[EXIT]]:
+; LE-MEMDEP-NEXT: ret void
+;
+; LE-MEMSSA-LABEL: define void @load_load_partial_alias_loop(
+; LE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; LE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; LE-MEMSSA-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; LE-MEMSSA-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
+; LE-MEMSSA-NEXT: call void @use.i8(i8 [[V_1]])
+; LE-MEMSSA-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
+; LE-MEMSSA-NEXT: call void @use.i32(i32 [[V_1_32]])
+; LE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; LE-MEMSSA: [[LOOP]]:
+; LE-MEMSSA-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP]] ]
+; LE-MEMSSA-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
+; LE-MEMSSA-NEXT: [[V_I:%.*]] = load i8, ptr [[P_I]], align 1
+; LE-MEMSSA-NEXT: call void @use.i8(i8 [[V_I]])
+; LE-MEMSSA-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
+; LE-MEMSSA-NEXT: call void @use.i32(i32 [[V_I_32]])
+; LE-MEMSSA-NEXT: [[I_INC]] = add i64 [[I]], 1
+; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
+; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; LE-MEMSSA: [[EXIT]]:
+; LE-MEMSSA-NEXT: ret void
+;
+; BE-MEMDEP-LABEL: define void @load_load_partial_alias_loop(
+; BE-MEMDEP-SAME: ptr [[P:%.*]]) {
+; BE-MEMDEP-NEXT: [[ENTRY:.*]]:
+; BE-MEMDEP-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; BE-MEMDEP-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
+; BE-MEMDEP-NEXT: call void @use.i8(i8 [[V_1]])
+; BE-MEMDEP-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
+; BE-MEMDEP-NEXT: call void @use.i32(i32 [[V_1_32]])
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; BE-MEMDEP-NEXT: br label %[[LOOP:.*]]
+; BE-MEMDEP: [[LOOP]]:
+; BE-MEMDEP-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[LOOP_LOOP_CRIT_EDGE:.*]] ]
+; BE-MEMDEP-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_LOOP_CRIT_EDGE]] ]
+; BE-MEMDEP-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
+; BE-MEMDEP-NEXT: call void @use.i8(i8 [[V_I]])
+; BE-MEMDEP-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
+; BE-MEMDEP-NEXT: call void @use.i32(i32 [[V_I_32]])
+; BE-MEMDEP-NEXT: [[I_INC]] = add i64 [[I]], 1
+; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
+; BE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
+; BE-MEMDEP-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8
+; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LOOP_LOOP_CRIT_EDGE]], label %[[EXIT:.*]]
+; BE-MEMDEP: [[LOOP_LOOP_CRIT_EDGE]]:
+; BE-MEMDEP-NEXT: br label %[[LOOP]]
+; BE-MEMDEP: [[EXIT]]:
+; BE-MEMDEP-NEXT: ret void
+;
+; BE-MEMSSA-LABEL: define void @load_load_partial_alias_loop(
+; BE-MEMSSA-SAME: ptr [[P:%.*]]) {
+; BE-MEMSSA-NEXT: [[ENTRY:.*]]:
+; BE-MEMSSA-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
+; BE-MEMSSA-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1
+; BE-MEMSSA-NEXT: call void @use.i8(i8 [[V_1]])
+; BE-MEMSSA-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4
+; BE-MEMSSA-NEXT: call void @use.i32(i32 [[V_1_32]])
+; BE-MEMSSA-NEXT: br label %[[LOOP:.*]]
+; BE-MEMSSA: [[LOOP]]:
+; BE-MEMSSA-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP]] ]
+; BE-MEMSSA-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]]
+; BE-MEMSSA-NEXT: [[V_I:%.*]] = load i8, ptr [[P_I]], align 1
+; BE-MEMSSA-NEXT: call void @use.i8(i8 [[V_I]])
+; BE-MEMSSA-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4
+; BE-MEMSSA-NEXT: call void @use.i32(i32 [[V_I_32]])
+; BE-MEMSSA-NEXT: [[I_INC]] = add i64 [[I]], 1
+; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
+; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; BE-MEMSSA: [[EXIT]]:
+; BE-MEMSSA-NEXT: ret void
;
entry:
%P.1 = getelementptr i8, ptr %P, i64 1
@@ -1129,37 +2191,63 @@ declare void @use.i32(i32) readnone
@global = external local_unnamed_addr global i8, align 4
define void @load_load_partial_alias_atomic(ptr %arg) {
-; LE-LABEL: @load_load_partial_alias_atomic(
-; LE-NEXT: bb:
-; LE-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
-; LE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4
-; LE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
-; LE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8
-; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
-; LE-NEXT: br label [[BB5:%.*]]
-; LE: bb5:
-; LE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ]
-; LE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
-; LE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
-; LE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
-; LE-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4
-; LE-NEXT: br label [[BB5]]
-;
-; BE-LABEL: @load_load_partial_alias_atomic(
-; BE-NEXT: bb:
-; BE-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1
-; BE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4
-; BE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
-; BE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48
-; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
-; BE-NEXT: br label [[BB5:%.*]]
-; BE: bb5:
-; BE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ]
-; BE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
-; BE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
-; BE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
-; BE-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4
-; BE-NEXT: br label [[BB5]]
+; LE-MEMDEP-LABEL: define void @load_load_partial_alias_atomic(
+; LE-MEMDEP-SAME: ptr [[ARG:%.*]]) {
+; LE-MEMDEP-NEXT: [[BB:.*]]:
+; LE-MEMDEP-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; LE-MEMDEP-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4
+; LE-MEMDEP-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8
+; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
+; LE-MEMDEP-NEXT: br label %[[BB5:.*]]
+; LE-MEMDEP: [[BB5]]:
+; LE-MEMDEP-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], %[[BB5]] ], [ [[TMP1]], %[[BB]] ]
+; LE-MEMDEP-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
+; LE-MEMDEP-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
+; LE-MEMDEP-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
+; LE-MEMDEP-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4
+; LE-MEMDEP-NEXT: br label %[[BB5]]
+;
+; LE-MEMSSA-LABEL: define void @load_load_partial_alias_atomic(
+; LE-MEMSSA-SAME: ptr [[ARG:%.*]]) {
+; LE-MEMSSA-NEXT: [[BB:.*:]]
+; LE-MEMSSA-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; LE-MEMSSA-NEXT: br label %[[BB5:.*]]
+; LE-MEMSSA: [[BB5]]:
+; LE-MEMSSA-NEXT: [[TMP4_1:%.*]] = load i8, ptr [[TMP3_1]], align 4
+; LE-MEMSSA-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
+; LE-MEMSSA-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
+; LE-MEMSSA-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
+; LE-MEMSSA-NEXT: br label %[[BB5]]
+;
+; BE-MEMDEP-LABEL: define void @load_load_partial_alias_atomic(
+; BE-MEMDEP-SAME: ptr [[ARG:%.*]]) {
+; BE-MEMDEP-NEXT: [[BB:.*]]:
+; BE-MEMDEP-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1
+; BE-MEMDEP-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4
+; BE-MEMDEP-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48
+; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
+; BE-MEMDEP-NEXT: br label %[[BB5:.*]]
+; BE-MEMDEP: [[BB5]]:
+; BE-MEMDEP-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], %[[BB5]] ], [ [[TMP1]], %[[BB]] ]
+; BE-MEMDEP-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
+; BE-MEMDEP-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
+; BE-MEMDEP-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
+; BE-MEMDEP-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4
+; BE-MEMDEP-NEXT: br label %[[BB5]]
+;
+; BE-MEMSSA-LABEL: define void @load_load_partial_alias_atomic(
+; BE-MEMSSA-SAME: ptr [[ARG:%.*]]) {
+; BE-MEMSSA-NEXT: [[BB:.*:]]
+; BE-MEMSSA-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2
+; BE-MEMSSA-NEXT: br label %[[BB5:.*]]
+; BE-MEMSSA: [[BB5]]:
+; BE-MEMSSA-NEXT: [[TMP4_1:%.*]] = load i8, ptr [[TMP3_1]], align 4
+; BE-MEMSSA-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4
+; BE-MEMSSA-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
+; BE-MEMSSA-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1
+; BE-MEMSSA-NEXT: br label %[[BB5]]
;
bb:
%tmp2.1 = getelementptr inbounds i8, ptr %arg, i64 1
@@ -1188,8 +2276,9 @@ bb5: ; preds = %bb14, %bb
@f = global %widening1 zeroinitializer, align 4
define i32 @test_widening1(ptr %P) nounwind ssp noredzone {
-; CHECK-LABEL: @test_widening1(
-; CHECK-NEXT: entry:
+; CHECK-LABEL: define i32 @test_widening1(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TTMP:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1:%.*]], ptr @f, i64 0, i32 1), align 4
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32
; CHECK-NEXT: [[TTMP1:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1]], ptr @f, i64 0, i32 2), align 1
@@ -1207,8 +2296,9 @@ entry:
}
define i32 @test_widening2() nounwind ssp noredzone {
-; CHECK-LABEL: @test_widening2(
-; CHECK-NEXT: entry:
+; CHECK-LABEL: define i32 @test_widening2(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TTMP:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1:%.*]], ptr @f, i64 0, i32 1), align 4
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32
; CHECK-NEXT: [[TTMP1:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1]], ptr @f, i64 0, i32 2), align 1
@@ -1262,12 +2352,39 @@ declare void @use3(ptr, ptr)
; PR8908
define void @test_escape1() nounwind {
-; CHECK-LABEL: @test_escape1(
-; CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8
-; CHECK-NEXT: call void @use() #[[ATTR3]]
-; CHECK-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]]
-; CHECK-NEXT: ret void
+; LE-MEMDEP-LABEL: define void @test_escape1(
+; LE-MEMDEP-SAME: ) #[[ATTR3]] {
+; LE-MEMDEP-NEXT: [[X:%.*]] = alloca ptr, align 8
+; LE-MEMDEP-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8
+; LE-MEMDEP-NEXT: call void @use() #[[ATTR3]]
+; LE-MEMDEP-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]]
+; LE-MEMDEP-NEXT: ret void
+;
+; LE-MEMSSA-LABEL: define void @test_escape1(
+; LE-MEMSSA-SAME: ) #[[ATTR3]] {
+; LE-MEMSSA-NEXT: [[X:%.*]] = alloca ptr, align 8
+; LE-MEMSSA-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8
+; LE-MEMSSA-NEXT: call void @use() #[[ATTR3]]
+; LE-MEMSSA-NEXT: [[DEAD:%.*]] = load ptr, ptr [[X]], align 8
+; LE-MEMSSA-NEXT: call void @use3(ptr [[X]], ptr [[DEAD]]) #[[ATTR3]]
+; LE-MEMSSA-NEXT: ret void
+;
+; BE-MEMDEP-LABEL: define void @test_escape1(
+; BE-MEMDEP-SAME: ) #[[ATTR3]] {
+; BE-MEMDEP-NEXT: [[X:%.*]] = alloca ptr, align 8
+; BE-MEMDEP-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8
+; BE-MEMDEP-NEXT: call void @use() #[[ATTR3]]
+; BE-MEMDEP-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]]
+; BE-MEMDEP-NEXT: ret void
+;
+; BE-MEMSSA-LABEL: define void @test_escape1(
+; BE-MEMSSA-SAME: ) #[[ATTR3]] {
+; BE-MEMSSA-NEXT: [[X:%.*]] = alloca ptr, align 8
+; BE-MEMSSA-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8
+; BE-MEMSSA-NEXT: call void @use() #[[ATTR3]]
+; BE-MEMSSA-NEXT: [[DEAD:%.*]] = load ptr, ptr [[X]], align 8
+; BE-MEMSSA-NEXT: call void @use3(ptr [[X]], ptr [[DEAD]]) #[[ATTR3]]
+; BE-MEMSSA-NEXT: ret void
;
%x = alloca ptr, align 8
store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr %x, align 8
@@ -1276,3 +2393,6 @@ define void @test_escape1() nounwind {
call void @use3(ptr %x, ptr %DEAD) nounwind
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; BE: {{.*}}
+; LE: {{.*}}
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
index b57a45f..8a608a1 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll
@@ -7,10 +7,10 @@ target triple = "aarch64"
declare void @streaming_compatible_f() #0 "aarch64_pstate_sm_compatible"
-; Function @streaming_callee doesn't contain any operations that may use ZA
+; Function @non_streaming_callee doesn't contain any operations that may use ZA
; state and therefore can be legally inlined into a normal function.
-define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_callee
+define void @non_streaming_callee() #0 {
+; CHECK-LABEL: define void @non_streaming_callee
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: call void @streaming_compatible_f()
; CHECK-NEXT: call void @streaming_compatible_f()
@@ -21,26 +21,26 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" {
ret void
}
-; Inline call to @streaming_callee to remove a streaming mode change.
-define void @non_streaming_caller_inline() #0 {
-; CHECK-LABEL: define void @non_streaming_caller_inline
+; Inline call to @non_streaming_callee to remove a streaming mode change.
+define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_inline
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: call void @streaming_compatible_f()
; CHECK-NEXT: call void @streaming_compatible_f()
; CHECK-NEXT: ret void
;
- call void @streaming_callee()
+ call void @non_streaming_callee()
ret void
}
-; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change.
-define void @streaming_caller_dont_inline() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_dont_inline
+; Don't inline call to @non_streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change.
+define void @non_streaming_caller_dont_inline() #0 {
+; CHECK-LABEL: define void @non_streaming_caller_dont_inline
; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT: call void @streaming_callee()
+; CHECK-NEXT: call void @non_streaming_callee()
; CHECK-NEXT: ret void
;
- call void @streaming_callee()
+ call void @non_streaming_callee()
ret void
}
diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
index 6cb1692..077a3aa 100644
--- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
+++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll
@@ -86,7 +86,7 @@ entry:
; [ ] N -> SC + B
define i32 @normal_caller_normal_callee_inline() #0 {
; CHECK-LABEL: define i32 @normal_caller_normal_callee_inline
-; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -103,7 +103,7 @@ entry:
; [ ] N -> SC + B
define i32 @normal_caller_streaming_callee_dont_inline() #0 {
; CHECK-LABEL: define i32 @normal_caller_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -120,7 +120,7 @@ entry:
; [ ] N -> SC + B
define i32 @normal_caller_streaming_compatible_callee_inline() #0 {
; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -137,7 +137,7 @@ entry:
; [ ] N -> SC + B
define i32 @normal_caller_locally_streaming_callee_dont_inline() #0 {
; CHECK-LABEL: define i32 @normal_caller_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -154,7 +154,7 @@ entry:
; [x] N -> SC + B
define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 {
; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -171,7 +171,7 @@ entry:
; [ ] S -> SC + B
define i32 @streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i32 @streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -188,7 +188,7 @@ entry:
; [ ] S -> SC + B
define i32 @streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i32 @streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -205,7 +205,7 @@ entry:
; [ ] S -> SC + B
define i32 @streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -222,7 +222,7 @@ entry:
; [ ] S -> SC + B
define i32 @streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i32 @streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -239,7 +239,7 @@ entry:
; [x] S -> SC + B
define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -256,7 +256,7 @@ entry:
; [ ] N + B -> SC + B
define i32 @locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @locally_streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -273,7 +273,7 @@ entry:
; [ ] N + B -> SC + B
define i32 @locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -290,7 +290,7 @@ entry:
; [ ] N + B -> SC + B
define i32 @locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -307,7 +307,7 @@ entry:
; [ ] N + B -> SC + B
define i32 @locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @locally_streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -324,7 +324,7 @@ entry:
; [x] N + B -> SC + B
define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR8]] {
+; CHECK-SAME: () #[[ATTR3]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -341,7 +341,7 @@ entry:
; [ ] SC -> SC + B
define i32 @streaming_compatible_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: define i32 @streaming_compatible_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -358,7 +358,7 @@ entry:
; [ ] SC -> SC + B
define i32 @streaming_compatible_caller_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -375,7 +375,7 @@ entry:
; [ ] SC -> SC + B
define i32 @streaming_compatible_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -392,7 +392,7 @@ entry:
; [ ] SC -> SC + B
define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -409,7 +409,7 @@ entry:
; [x] SC -> SC + B
define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline
-; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -425,7 +425,7 @@ entry:
; [ ] SC + B -> SC + B
define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline
-; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee()
; CHECK-NEXT: ret i32 [[RES]]
@@ -442,7 +442,7 @@ entry:
; [ ] SC + B -> SC + B
define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -459,7 +459,7 @@ entry:
; [ ] SC + B -> SC + B
define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -476,7 +476,7 @@ entry:
; [ ] SC + B -> SC + B
define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -493,7 +493,7 @@ entry:
; [x] SC + B -> SC + B
define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" {
; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline
-; CHECK-SAME: () #[[ATTR10]] {
+; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: ret i32 [[RES_I]]
@@ -505,7 +505,7 @@ entry:
define void @normal_callee_with_inlineasm() #0 {
; CHECK-LABEL: define void @normal_callee_with_inlineasm
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: ret void
@@ -517,7 +517,7 @@ entry:
define void @streaming_caller_normal_callee_with_inlineasm_dont_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @normal_callee_with_inlineasm()
; CHECK-NEXT: ret void
@@ -529,7 +529,7 @@ entry:
define i64 @normal_callee_with_intrinsic_call() #0 {
; CHECK-LABEL: define i64 @normal_callee_with_intrinsic_call
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4)
; CHECK-NEXT: ret i64 [[RES]]
@@ -541,7 +541,7 @@ entry:
define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_with_intrinsic_call()
; CHECK-NEXT: ret i64 [[RES]]
@@ -555,7 +555,7 @@ declare i64 @llvm.aarch64.sve.cntb(i32)
define i64 @normal_callee_call_sme_state() #0 {
; CHECK-LABEL: define i64 @normal_callee_call_sme_state
-; CHECK-SAME: () #[[ATTR6]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call { i64, i64 } @__arm_sme_state()
; CHECK-NEXT: [[RES_0:%.*]] = extractvalue { i64, i64 } [[RES]], 0
@@ -571,7 +571,7 @@ declare {i64, i64} @__arm_sme_state()
define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline() #0 "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline
-; CHECK-SAME: () #[[ATTR7]] {
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_call_sme_state()
; CHECK-NEXT: ret i64 [[RES]]
@@ -583,57 +583,57 @@ entry:
-declare void @streaming_body() "aarch64_pstate_sm_enabled"
+declare void @nonstreaming_body()
-define void @streaming_caller_single_streaming_callee() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_callee
-; CHECK-SAME: () #[[ATTR7]] {
-; CHECK-NEXT: call void @streaming_body()
+define void @nonstreaming_caller_single_nonstreaming_callee() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_single_nonstreaming_callee
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT: call void @nonstreaming_body()
; CHECK-NEXT: ret void
;
- call void @streaming_body()
+ call void @nonstreaming_body()
ret void
}
-define void @streaming_caller_multiple_streaming_callees() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees
-; CHECK-SAME: () #[[ATTR7]] {
-; CHECK-NEXT: call void @streaming_body()
-; CHECK-NEXT: call void @streaming_body()
+define void @nonstreaming_caller_multiple_nonstreaming_callees() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_multiple_nonstreaming_callees
+; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-NEXT: call void @nonstreaming_body()
+; CHECK-NEXT: call void @nonstreaming_body()
; CHECK-NEXT: ret void
;
- call void @streaming_body()
- call void @streaming_body()
+ call void @nonstreaming_body()
+ call void @nonstreaming_body()
ret void
}
; Allow inlining, as inline it would not increase the number of streaming-mode changes.
-define void @streaming_caller_single_streaming_callee_inline() #0 {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
-; CHECK-NEXT: call void @streaming_body()
+define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT: call void @nonstreaming_body()
; CHECK-NEXT: ret void
;
- call void @streaming_caller_single_streaming_callee()
+ call void @nonstreaming_caller_single_nonstreaming_callee()
ret void
}
-; Prevent inlining, as inline it would lead to multiple streaming-mode changes.
-define void @streaming_caller_multiple_streaming_callees_dont_inline() #0 {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline
-; CHECK-SAME: () #[[ATTR6]] {
-; CHECK-NEXT: call void @streaming_caller_multiple_streaming_callees()
+; Prevent inlining, as inlining it would lead to multiple streaming-mode changes.
+define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT: call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline()
; CHECK-NEXT: ret void
;
- call void @streaming_caller_multiple_streaming_callees()
+ call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline()
ret void
}
declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible"
-define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee
-; CHECK-SAME: () #[[ATTR7]] {
+define void @nonstreaming_caller_single_streaming_compatible_callee() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_single_streaming_compatible_callee
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: ret void
;
@@ -641,9 +641,9 @@ define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_
ret void
}
-define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch64_pstate_sm_enabled" {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees
-; CHECK-SAME: () #[[ATTR7]] {
+define void @nonstreaming_caller_multiple_streaming_compatible_callees() #0 {
+; CHECK-LABEL: define void @nonstreaming_caller_multiple_streaming_compatible_callees
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: ret void
@@ -654,25 +654,67 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch
}
; Allow inlining, as inline would remove a streaming-mode change.
-define void @streaming_caller_single_streaming_compatible_callee_inline() #0 {
-; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline
-; CHECK-SAME: () #[[ATTR6]] {
+define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: ret void
;
- call void @streaming_caller_single_streaming_compatible_callee()
+ call void @nonstreaming_caller_single_streaming_compatible_callee()
ret void
}
-; Allow inlining, as inline would remove several stremaing-mode changes.
-define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0 {
-; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline
-; CHECK-SAME: () #[[ATTR6]] {
+; Allow inlining, as inline would remove several streaming-mode changes.
+define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline() #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline
+; CHECK-SAME: () #[[ATTR2]] {
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: call void @streaming_compatible_body()
; CHECK-NEXT: ret void
;
- call void @streaming_caller_multiple_streaming_compatible_callees()
+ call void @nonstreaming_caller_multiple_streaming_compatible_callees()
+ ret void
+}
+
+define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled" {
+; CHECK-LABEL: define void @simple_streaming_function
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT: ret void
+;
+ store <vscale x 4 x i32> zeroinitializer, ptr %ptr
+ ret void
+}
+
+; Don't allow inlining a streaming function into a non-streaming function.
+define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 {
+; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: call void @simple_streaming_function(ptr [[PTR]])
+; CHECK-NEXT: ret void
+;
+ call void @simple_streaming_function(ptr %ptr)
+ ret void
+}
+
+define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_body" {
+; CHECK-LABEL: define void @simple_locally_streaming_function
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16
+; CHECK-NEXT: ret void
+;
+ store <vscale x 4 x i32> zeroinitializer, ptr %ptr
+ ret void
+}
+
+; Don't allow inlining a locally-streaming function into a non-streaming function.
+define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 {
+; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline
+; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: call void @simple_locally_streaming_function(ptr [[PTR]])
+; CHECK-NEXT: ret void
+;
+ call void @simple_locally_streaming_function(ptr %ptr)
ret void
}
diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
index a92e0c2..e2f22b8 100644
--- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
+++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll
@@ -293,3 +293,183 @@ entry:
%p2 = getelementptr <vscale x 4 x i32>, ptr %p1, i64 %index
ret ptr %p2
}
+
+define ptr @test_all_nuw(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_nuw(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_partial_nuw1(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_partial_nuw1(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_partial_nuw2(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_partial_nuw2(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nuw i8, ptr %base, i64 1
+ %index = add i64 %a, 2
+ %p2 = getelementptr nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_partial_nuw3(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_partial_nuw3(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_nuw_disjoint(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_nuw_disjoint(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nuw i8, ptr %base, i64 1
+ %index = or disjoint i64 %a, 2
+ %p2 = getelementptr nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_inbounds_nuw(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_inbounds_nuw(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_partial_inbounds1(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_partial_inbounds1(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_partial_inbounds2(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_partial_inbounds2(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_inbounds_partial_nuw1(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw1(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 7
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr inbounds i8, ptr %base, i64 -1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_inbounds_partial_nuw2(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw2(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1
+ %index = add nuw i64 %a, 2
+ %p2 = getelementptr inbounds i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_inbounds_partial_nuw3(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw3(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1
+ %index = add i64 %a, 2
+ %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
+
+define ptr @test_all_nusw_nuw(ptr %base, i64 %a) {
+; CHECK-LABEL: define ptr @test_all_nusw_nuw(
+; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9
+; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]]
+; CHECK-NEXT: ret ptr [[P2]]
+;
+entry:
+ %p1 = getelementptr nusw nuw i8, ptr %base, i64 1
+ %index = add nsw nuw i64 %a, 2
+ %p2 = getelementptr nusw nuw i32, ptr %p1, i64 %index
+ ret ptr %p2
+}
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index df34e7d..f44d27c 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -68,7 +68,6 @@ define i1 @test1_noinbounds_as1(i32 %x) {
%q = load i16, ptr addrspace(1) %p
%r = icmp eq i16 %q, 0
ret i1 %r
-
}
define i1 @test1_noinbounds_as2(i64 %x) {
@@ -81,7 +80,17 @@ define i1 @test1_noinbounds_as2(i64 %x) {
%q = load i16, ptr addrspace(2) %p
%r = icmp eq i16 %q, 0
ret i1 %r
+}
+define i1 @test1_noarrayty(i32 %X) {
+; CHECK-LABEL: @test1_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds i16, ptr @G16, i32 %X
+ %Q = load i16, ptr %P
+ %R = icmp eq i16 %Q, 0
+ ret i1 %R
}
define i1 @test2(i32 %X) {
@@ -104,7 +113,17 @@ define i1 @test3(i32 %X) {
%Q = load double, ptr %P
%R = fcmp oeq double %Q, 1.0
ret i1 %R
+}
+define i1 @test3_noarrayty(i32 %X) {
+; CHECK-LABEL: @test3_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds double, ptr @GD, i32 %X
+ %Q = load double, ptr %P
+ %R = fcmp oeq double %Q, 1.0
+ ret i1 %R
}
define i1 @test4(i32 %X) {
@@ -325,6 +344,17 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
ret i1 %r
}
+define i1 @test10_struct_arr_noarrayty(i32 %x) {
+; CHECK-LABEL: @test10_struct_arr_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %p = getelementptr inbounds %Foo, ptr @GStructArr, i32 %x, i32 2
+ %q = load i32, ptr %p
+ %r = icmp eq i32 %q, 9
+ ret i1 %r
+}
+
@table = internal constant [2 x ptr] [ptr @g, ptr getelementptr (i8, ptr @g, i64 4)], align 16
@g = external global [2 x i32]
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index d9f729e..3454835 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -3025,3 +3025,31 @@ join:
%umax = call noundef i32 @llvm.umax(i32 noundef %phi, i32 1)
ret i32 %umax
}
+
+define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) {
+; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[IF_END:%.*]]
+; CHECK: if:
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[ADD]], i32 [[ADD]], i32 29)
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 1
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br i1 %c, label %if, label %if.end
+
+if:
+ %add = add i32 %arg, -8
+ br label %if.end
+
+if.end:
+ %phi = phi i32 [ %add, %if ], [ 0, %entry ]
+ %fshl1 = call i32 @llvm.fshl.i32(i32 %phi, i32 %phi, i32 29)
+ %fshl2 = call i32 @llvm.fshl.i32(i32 %phi, i32 %phi, i32 29)
+ %add2 = add i32 %fshl1, %fshl2
+ ret i32 %add2
+}
diff --git a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
new file mode 100644
index 0000000..1e089e1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+define i1 @extract_const_idx(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: define i1 @extract_const_idx(
+; CHECK-SAME: <vscale x 4 x i1> [[A:%.*]]) {
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 4 x i1> [[A]], i64 1
+; CHECK-NEXT: ret i1 [[ELT]]
+;
+ %subvec = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1.i64(<vscale x 4 x i1> %a, i64 0)
+ %elt = extractelement <vscale x 2 x i1> %subvec, i32 1
+ ret i1 %elt
+}
+
+define float @extract_variable_idx(<vscale x 4 x float> %a, i32 %idx) {
+; CHECK-LABEL: define float @extract_variable_idx(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], i32 [[IDX:%.*]]) {
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 4 x float> [[A]], i32 [[IDX]]
+; CHECK-NEXT: ret float [[ELT]]
+;
+ %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 0)
+ %elt = extractelement <vscale x 2 x float> %subvec, i32 %idx
+ ret float %elt
+}
+
+define float @negative_test(<vscale x 4 x float> %a) {
+; CHECK-LABEL: define float @negative_test(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) {
+; CHECK-NEXT: [[SUBVEC:%.*]] = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> [[A]], i64 2)
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 2 x float> [[SUBVEC]], i64 1
+; CHECK-NEXT: ret float [[ELT]]
+;
+ %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 2)
+ %elt = extractelement <vscale x 2 x float> %subvec, i32 1
+ ret float %elt
+}
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
index 75b8509..6eed7f8 100644
--- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll
@@ -416,6 +416,54 @@ define float @test_round_ftz_f_neg_1_5() {
ret float %res
}
+define double @test_round_d_2_5() {
+; CHECK-LABEL: define double @test_round_d_2_5() {
+; CHECK-NEXT: ret double 2.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double 2.5)
+ ret double %res
+}
+
+define float @test_round_f_2_5() {
+; CHECK-LABEL: define float @test_round_f_2_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float 2.5)
+ ret float %res
+}
+
+define float @test_round_ftz_f_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_2_5() {
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float 2.5)
+ ret float %res
+}
+
+define double @test_round_d_neg_2_5() {
+; CHECK-LABEL: define double @test_round_d_neg_2_5() {
+; CHECK-NEXT: ret double -2.000000e+00
+;
+ %res = call double @llvm.nvvm.round.d(double -2.5)
+ ret double %res
+}
+
+define float @test_round_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_f_neg_2_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.f(float -2.5)
+ ret float %res
+}
+
+define float @test_round_ftz_f_neg_2_5() {
+; CHECK-LABEL: define float @test_round_ftz_f_neg_2_5() {
+; CHECK-NEXT: ret float -2.000000e+00
+;
+ %res = call float @llvm.nvvm.round.ftz.f(float -2.5)
+ ret float %res
+}
+
define double @test_round_d_neg_subnorm() {
; CHECK-LABEL: define double @test_round_d_neg_subnorm() {
; CHECK-NEXT: ret double -0.000000e+00
diff --git a/llvm/test/Transforms/LICM/gep-reassociate.ll b/llvm/test/Transforms/LICM/gep-reassociate.ll
index 630a751..0090c76 100644
--- a/llvm/test/Transforms/LICM/gep-reassociate.ll
+++ b/llvm/test/Transforms/LICM/gep-reassociate.ll
@@ -39,11 +39,13 @@ exit:
ret void
}
-define void @both_inbounds_one_neg(ptr %ptr, i1 %c) {
+define void @both_inbounds_one_neg(ptr %ptr, i1 %c, i64 %neg) {
; CHECK-LABEL: define void @both_inbounds_one_neg
-; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) {
+; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NEG:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1
+; CHECK-NEXT: [[IS_NEG:%.*]] = icmp slt i64 [[NEG]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NEG]])
+; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[NEG]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32()
@@ -55,13 +57,15 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c) {
; CHECK-NEXT: ret void
;
entry:
+ %is.neg = icmp slt i64 %neg, 0
+ call void @llvm.assume(i1 %is.neg)
br label %loop
loop:
%val = call i32 @get.i32()
%val.ext = zext i32 %val to i64
%ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext
- %ptr3 = getelementptr i8, ptr %ptr2, i64 -1
+ %ptr3 = getelementptr i8, ptr %ptr2, i64 %neg
call void @use(ptr %ptr3)
br i1 %c, label %loop, label %exit
@@ -69,11 +73,13 @@ exit:
ret void
}
-define void @both_inbounds_pos(ptr %ptr, i1 %c) {
+define void @both_inbounds_pos(ptr %ptr, i1 %c, i64 %nonneg) {
; CHECK-LABEL: define void @both_inbounds_pos
-; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) {
+; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NONNEG:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
+; CHECK-NEXT: [[IS_NONNEG:%.*]] = icmp sge i64 [[NONNEG]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NONNEG]])
+; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[NONNEG]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32()
@@ -85,13 +91,15 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c) {
; CHECK-NEXT: ret void
;
entry:
+ %is.nonneg = icmp sge i64 %nonneg, 0
+ call void @llvm.assume(i1 %is.nonneg)
br label %loop
loop:
%val = call i32 @get.i32()
%val.ext = zext i32 %val to i64
%ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext
- %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 1
+ %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 %nonneg
call void @use(ptr %ptr3)
br i1 %c, label %loop, label %exit
@@ -440,3 +448,32 @@ latch:
exit:
ret void
}
+
+; Do not reassociate constant offset GEP.
+define void @constant_offset(ptr %ptr, i1 %c) {
+; CHECK-LABEL: define void @constant_offset
+; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64()
+; CHECK-NEXT: [[GEP_BASE:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[VAL]]
+; CHECK-NEXT: [[GEP_OFF:%.*]] = getelementptr i8, ptr [[GEP_BASE]], i64 1
+; CHECK-NEXT: call void @use(ptr [[GEP_OFF]])
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %val = call i64 @get.i64()
+ %gep.base = getelementptr i8, ptr %ptr, i64 %val
+ %gep.off = getelementptr i8, ptr %gep.base, i64 1
+ call void @use(ptr %gep.off)
+ br i1 %c, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
index aa954aeb..9003072 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll
@@ -383,14 +383,14 @@ define void @vscale_squared_offset(ptr %alloc) #0 {
; COMMON-LABEL: vscale_squared_offset:
; COMMON: // %bb.0: // %entry
; COMMON-NEXT: rdvl x9, #1
+; COMMON-NEXT: rdvl x10, #4
; COMMON-NEXT: fmov z0.s, #4.00000000
-; COMMON-NEXT: mov x8, xzr
; COMMON-NEXT: lsr x9, x9, #4
; COMMON-NEXT: fmov z1.s, #8.00000000
-; COMMON-NEXT: cntw x10
+; COMMON-NEXT: mov x8, xzr
; COMMON-NEXT: ptrue p0.s, vl1
-; COMMON-NEXT: umull x9, w9, w9
-; COMMON-NEXT: lsl x9, x9, #6
+; COMMON-NEXT: umull x9, w9, w10
+; COMMON-NEXT: cntw x10
; COMMON-NEXT: cmp x8, x10
; COMMON-NEXT: b.ge .LBB6_2
; COMMON-NEXT: .LBB6_1: // %for.body
diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
index 953dc278..dd2913d 100644
--- a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
@@ -444,3 +444,158 @@ loop:
exit:
ret i64 %rdx.next
}
+
+define void @reduction_with_intermediate_store(ptr %src, ptr %sum) {
+; CHECK-LABEL: define void @reduction_with_intermediate_store(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]]
+; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
+; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]]
+; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
+; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]]
+; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4
+; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]]
+; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4
+; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000
+; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %sum.promoted = load i32, ptr %sum, align 4
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ]
+ %gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv
+ %l = load i32, ptr %gep.src, align 4
+ %red.next = add nsw i32 %red, %l
+ store i32 %red.next, ptr %sum, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 10000
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+declare i32 @foo()
+
+; Loop with a call cannot be handled by LoopVectorize, introducing additional
+; accumulators when unrolling increases throughput.
+define i32 @test_add_with_call(i64 %n, i32 %start) {
+; CHECK-LABEL: define i32 @test_add_with_call(
+; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[L:%.*]] = call i32 @foo()
+; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
+; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo()
+; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
+; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo()
+; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
+; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo()
+; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
+; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
+; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[BIN_RDX2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
+ %iv.next = add i64 %iv, 1
+ %l = call i32 @foo()
+ %rdx.next = add i32 %rdx, %l
+ %ec = icmp ne i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret i32 %rdx.next
+}
+
+; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
+; accumulators when unrolling increases throughput.
+define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) {
+; CHECK-LABEL: define i32 @test_add_with_backward_dep(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4
+; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]]
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4
+; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4
+; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]]
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4
+; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4
+; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]]
+; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4
+; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4
+; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]]
+; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
+; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; CHECK-NEXT: ret i32 [[BIN_RDX3]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
+ %iv.next = add i64 %iv, 1
+ %gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv
+ %l = load i32, ptr %gep
+ %gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next
+ store i32 0, ptr %gep.1
+ %rdx.next = add i32 %rdx, %l
+ %ec = icmp ne i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret i32 %rdx.next
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
new file mode 100644
index 0000000..8495dee
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \
+; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s
+
+; FIXME: Hoisted vector code should be costed with scalable cost.
+; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost,
+; and hence should not be produced by LoopVectorize.
+
+; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00)
+define void @cost_hoisted_vector_code(ptr %p, float %arg) {
+; CHECK-LABEL: define void @cost_hoisted_vector_code(
+; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[ARG]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[BROADCAST_SPLAT]], <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
+ %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.0)
+ %gep.p.red = getelementptr float, ptr %p, i64 %iv
+ store float %res, ptr %gep.p.red, align 4
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+declare float @llvm.minimumnum.f32(float, float)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
index 7308129..a11896a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll
@@ -100,9 +100,9 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP8]]
; DATA_NO_LANEMASK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
; DATA_NO_LANEMASK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
; DATA_NO_LANEMASK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DATA_NO_LANEMASK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
+; DATA_NO_LANEMASK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX]], 1
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; DATA_NO_LANEMASK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[VAL:%.*]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
index 21266e5..162440a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
@@ -1,6 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFBFMIN
-; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFBFMIN-PREDICATED
+; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFBFMIN
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -S | FileCheck %s -check-prefix=ZVFBFMIN
define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
@@ -22,24 +22,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; NO-ZVFBFMIN: [[EXIT]]:
; NO-ZVFBFMIN-NEXT: ret void
;
-; NO-ZVFBFMIN-PREDICATED-LABEL: define void @fadd(
-; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
-; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
-; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
-;
; ZVFBFMIN-LABEL: define void @fadd(
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
@@ -152,54 +134,6 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
; NO-ZVFBFMIN: [[EXIT]]:
; NO-ZVFBFMIN-NEXT: ret void
;
-; NO-ZVFBFMIN-PREDICATED-LABEL: define void @vfwmaccbf16.vv(
-; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
-; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]]
-; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <4 x bfloat>, ptr [[A_GEP]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = load <4 x bfloat>, ptr [[B_GEP]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = load <4 x float>, ptr [[C_GEP]], align 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD]] to <4 x float>
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP5:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD3]] to <4 x float>
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[WIDE_MASKED_LOAD4]])
-; NO-ZVFBFMIN-PREDICATED-NEXT: store <4 x float> [[TMP6]], ptr [[C_GEP]], align 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[I]], 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
-; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
-; NO-ZVFBFMIN-PREDICATED: [[LOOP]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I1]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP1:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I1]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP1:%.*]] = getelementptr float, ptr [[C]], i64 [[I1]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP1]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP1]], align 2
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP1]], align 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]])
-; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP1]], align 4
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I1]], 1
-; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
-; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
-; NO-ZVFBFMIN-PREDICATED: [[EXIT]]:
-; NO-ZVFBFMIN-PREDICATED-NEXT: ret void
-;
; ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv(
; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; ZVFBFMIN-NEXT: [[ENTRY:.*]]:
@@ -274,21 +208,3 @@ loop:
exit:
ret void
}
-;.
-; NO-ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; NO-ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; NO-ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; NO-ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
-; NO-ZVFBFMIN-PREDICATED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; NO-ZVFBFMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; NO-ZVFBFMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; NO-ZVFBFMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
-; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; ZVFBFMIN: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; ZVFBFMIN: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll
index 53e43e1..effaf57 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll
@@ -1,6 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFHMIN
-; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFHMIN-PREDICATED
+; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFHMIN
; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -S | FileCheck %s -check-prefix=ZVFHMIN
define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
@@ -22,24 +22,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; NO-ZVFHMIN: [[EXIT]]:
; NO-ZVFHMIN-NEXT: ret void
;
-; NO-ZVFHMIN-PREDICATED-LABEL: define void @fadd(
-; NO-ZVFHMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; NO-ZVFHMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
-; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
-; NO-ZVFHMIN-PREDICATED: [[LOOP]]:
-; NO-ZVFHMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
-; NO-ZVFHMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]]
-; NO-ZVFHMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]]
-; NO-ZVFHMIN-PREDICATED-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2
-; NO-ZVFHMIN-PREDICATED-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2
-; NO-ZVFHMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]]
-; NO-ZVFHMIN-PREDICATED-NEXT: store half [[Z]], ptr [[A_GEP]], align 2
-; NO-ZVFHMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
-; NO-ZVFHMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
-; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
-; NO-ZVFHMIN-PREDICATED: [[EXIT]]:
-; NO-ZVFHMIN-PREDICATED-NEXT: ret void
-;
; ZVFHMIN-LABEL: define void @fadd(
; ZVFHMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; ZVFHMIN-NEXT: [[ENTRY:.*]]:
@@ -86,6 +68,23 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; ZVFHMIN: [[EXIT]]:
; ZVFHMIN-NEXT: ret void
;
+; NO-ZVFHMIN-PREDICATED-LABEL: define void @fadd(
+; NO-ZVFHMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-ZVFHMIN-PREDICATED-NEXT: [[ENTRY:.*]]:
+; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[LOOP:.*]]
+; NO-ZVFHMIN-PREDICATED: [[LOOP]]:
+; NO-ZVFHMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
+; NO-ZVFHMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]]
+; NO-ZVFHMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]]
+; NO-ZVFHMIN-PREDICATED-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2
+; NO-ZVFHMIN-PREDICATED-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2
+; NO-ZVFHMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]]
+; NO-ZVFHMIN-PREDICATED-NEXT: store half [[Z]], ptr [[A_GEP]], align 2
+; NO-ZVFHMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1
+; NO-ZVFHMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
+; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
+; NO-ZVFHMIN-PREDICATED: [[EXIT]]:
+; NO-ZVFHMIN-PREDICATED-NEXT: ret void
entry:
br label %loop
loop:
@@ -102,9 +101,3 @@ loop:
exit:
ret void
}
-;.
-; ZVFHMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; ZVFHMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; ZVFHMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; ZVFHMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll
new file mode 100644
index 0000000..c5396f2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -mtriple=riscv64 -mattr=+v -passes=loop-vectorize \
+; RUN: -scalable-vectorization=off -enable-masked-interleaved-mem-accesses \
+; RUN: -force-vector-interleave=1 -riscv-v-vector-bits-min=1024 -S < %s | FileCheck %s
+
+define void @store_factor_2_with_tail_gap(i64 %n, ptr %a) {
+; CHECK-LABEL: define void @store_factor_2_with_tail_gap(
+; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VEC_IND]], <16 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT: call void @llvm.masked.store.v32i64.p0(<32 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], i32 8, <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT: store i64 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %0 = shl nsw i64 %iv, 1
+ %arrayidx = getelementptr inbounds i64, ptr %a, i64 %0
+ store i64 %iv, ptr %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll
new file mode 100644
index 0000000..554ce7b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll
@@ -0,0 +1,1481 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt < %s -p loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s
+
+; Reduction can be vectorized
+
+; ADD
+
+define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @add(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 %add
+}
+
+; OR
+
+define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @or(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = or <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[OR:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[OR]] = or i32 [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[OR_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %or = or i32 %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 %or
+}
+
+; AND
+
+define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @and(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> splat (i32 -1), i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = and <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[AND:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[AND]] = and i32 [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[AND_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %and = and i32 %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 %and
+}
+
+; XOR
+
+define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @xor(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = xor <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[XOR]] = xor i32 [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[XOR_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %xor = xor i32 %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 %xor
+}
+
+; SMIN
+
+define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @smin(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[SUM_010]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %cmp.i = icmp slt i32 %0, %sum.010
+ %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret i32 %.sroa.speculated
+}
+
+; UMAX
+
+define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @umax(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP11]], [[SUM_010]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %cmp.i = icmp ugt i32 %0, %sum.010
+ %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret i32 %.sroa.speculated
+}
+
+; FADD (FAST)
+
+define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
+; CHECK-LABEL: define float @fadd_fast(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = fadd fast <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret float [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %add = fadd fast float %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret float %add
+}
+
+define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfh" {
+; CHECK-LABEL: define half @fadd_fast_half_zvfh(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7]] = fadd fast <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP9:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> [[TMP7]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP10]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %add = fadd fast half %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %add
+}
+
+define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfhmin" {
+; CHECK-LABEL: define half @fadd_fast_half_zvfhmin(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = fadd fast <16 x half> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP3]] = fadd fast <16 x half> [[WIDE_LOAD2]], [[VEC_PHI1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP6]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %add = fadd fast half %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %add
+}
+
+define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfbfmin" {
+; CHECK-LABEL: define bfloat @fadd_fast_bfloat(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = fadd fast <16 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP3]] = fadd fast <16 x bfloat> [[WIDE_LOAD2]], [[VEC_PHI1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x bfloat> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = fadd fast bfloat [[TMP6]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi bfloat [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret bfloat [[ADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+ %0 = load bfloat, ptr %arrayidx, align 4
+ %add = fadd fast bfloat %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret bfloat %add
+}
+
+; FMIN (FAST)
+
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+; CHECK-LABEL: define float @fmin_fast(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt float [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %cmp.i = fcmp olt float %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret float %.sroa.speculated
+}
+
+define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
+; CHECK-LABEL: define half @fmin_fast_half_zvfhmin(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt half [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %cmp.i = fcmp olt half %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %.sroa.speculated
+}
+
+define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
+; CHECK-LABEL: define bfloat @fmin_fast_bfloat_zvfbfmin(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt bfloat [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+ %0 = load bfloat, ptr %arrayidx, align 4
+ %cmp.i = fcmp olt bfloat %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret bfloat %.sroa.speculated
+}
+
+; FMAX (FAST)
+
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+; CHECK-LABEL: define float @fmax_fast(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt float [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %cmp.i = fcmp fast ogt float %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret float %.sroa.speculated
+}
+
+define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
+; CHECK-LABEL: define half @fmax_fast_half_zvfhmin(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt half [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %cmp.i = fcmp fast ogt half %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %.sroa.speculated
+}
+
+define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
+; CHECK-LABEL: define bfloat @fmax_fast_bfloat_zvfbfmin(
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt bfloat [[TMP11]], [[SUM_07]]
+; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
+ %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+ %0 = load bfloat, ptr %arrayidx, align 4
+ %cmp.i = fcmp fast ogt bfloat %0, %sum.07
+ %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret bfloat %.sroa.speculated
+}
+
+; Reduction cannot be vectorized
+
+; MUL
+
+define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @mul(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP3]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MUL:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP6]], [[SUM_07]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[MUL_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %mul = mul nsw i32 %0, %sum.07
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 %mul
+}
+
+; Note: This test was added to ensure we always check the legality of reductions before checking for memory dependencies
+define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
+; CHECK-LABEL: define i32 @memory_dependence(
+; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD1]], [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[MUL:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i64 [[I]], 32
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD2]]
+; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[SUM]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[MUL_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
+ %1 = load i32, ptr %arrayidx1, align 4
+ %add = add nsw i32 %1, %0
+ %add2 = add nuw nsw i64 %i, 32
+ %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
+ store i32 %add, ptr %arrayidx3, align 4
+ %mul = mul nsw i32 %1, %sum
+ %inc = add nuw nsw i64 %i, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret i32 %mul
+}
+
+define float @fmuladd(ptr %a, ptr %b, i64 %n) {
+; CHECK-LABEL: define float @fmuladd(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD1]], <vscale x 4 x float> [[VEC_PHI]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP11]], float [[TMP12]], float [[SUM_07]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret float [[MULADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+ %1 = load float, ptr %arrayidx2, align 4
+ %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret float %muladd
+}
+
+define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" {
+; CHECK-LABEL: define half @fmuladd_f16_zvfh(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ insertelement (<vscale x 8 x half> splat (half 0xH8000), half 0xH0000, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x half>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD1]], <vscale x 8 x half> [[VEC_PHI]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, <vscale x 8 x half> [[TMP8]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP11]], half [[TMP12]], half [[SUM_07]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[MULADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv
+ %1 = load half, ptr %arrayidx2, align 4
+ %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %muladd
+}
+
+
+; We can't scalably vectorize reductions of f16 with zvfhmin or bf16 with zvfbfmin, so make sure we use fixed-length vectors instead.
+
+define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvfhmin" {
+; CHECK-LABEL: define half @fmuladd_f16_zvfhmin(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ <half 0xH0000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ splat (half 0xH8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x half>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x half>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], <16 x half> [[VEC_PHI]])
+; CHECK-NEXT: [[TMP5]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], <16 x half> [[VEC_PHI1]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load half, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP8]], half [[TMP9]], half [[SUM_07]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret half [[MULADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+ %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
+ %0 = load half, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv
+ %1 = load half, ptr %arrayidx2, align 4
+ %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret half %muladd
+}
+
+define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin" {
+; CHECK-LABEL: define bfloat @fmuladd_bf16(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ <bfloat 0xR0000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000>, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ splat (bfloat 0xR8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i32 16
+; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], <16 x bfloat> [[VEC_PHI]])
+; CHECK-NEXT: [[TMP5]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], <16 x bfloat> [[VEC_PHI1]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT: [[TMP9:%.*]] = load bfloat, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[MULADD]] = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat [[TMP8]], bfloat [[TMP9]], bfloat [[SUM_07]])
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi bfloat [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret bfloat [[MULADD_LCSSA]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
+ %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+ %0 = load bfloat, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds bfloat, ptr %b, i64 %iv
+ %1 = load bfloat, ptr %arrayidx2, align 4
+ %muladd = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat %0, bfloat %1, bfloat %sum.07)
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret bfloat %muladd
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfhmin,+zvfhmin"}
+attributes #2 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfbfmin,+zvfbfmin"}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
deleted file mode 100644
index 695a0c3..0000000
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
+++ /dev/null
@@ -1,729 +0,0 @@
-; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on \
-; RUN: -riscv-v-vector-bits-max=128 \
-; RUN: -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \
-; RUN: -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN: -force-target-max-vector-interleave=2 -mattr=+v,+f -S 2>%t \
-; RUN: | FileCheck %s -check-prefix=CHECK
-; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
-
-; Reduction can be vectorized
-
-; ADD
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @add
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[ADD1:.*]] = add <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[ADD2:.*]] = add <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[ADD:.*]] = add <vscale x 8 x i32> %[[ADD2]], %[[ADD1]]
-; CHECK-NEXT: call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> %[[ADD]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %add = add nsw i32 %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body, %entry
- ret i32 %add
-}
-
-; OR
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @or
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[OR1:.*]] = or <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[OR2:.*]] = or <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[OR:.*]] = or <vscale x 8 x i32> %[[OR2]], %[[OR1]]
-; CHECK-NEXT: call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> %[[OR]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %or = or i32 %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body, %entry
- ret i32 %or
-}
-
-; AND
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @and
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[AND1:.*]] = and <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[AND2:.*]] = and <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[ABD:.*]] = and <vscale x 8 x i32> %[[ADD2]], %[[AND1]]
-; CHECK-NEXT: call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %[[ADD]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %and = and i32 %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body, %entry
- ret i32 %and
-}
-
-; XOR
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @xor
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[XOR1:.*]] = xor <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[XOR2:.*]] = xor <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[XOR:.*]] = xor <vscale x 8 x i32> %[[XOR2]], %[[XOR1]]
-; CHECK-NEXT: call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %[[XOR]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %xor = xor i32 %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body, %entry
- ret i32 %xor
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-; SMIN
-
-define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @smin
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[ICMP1:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[ICMP2:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
-; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[RDX]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %cmp.i = icmp slt i32 %0, %sum.010
- %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret i32 %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-; UMAX
-
-define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @umax
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32>
-; CHECK: %[[ICMP1:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[ICMP2:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
-; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[RDX]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %cmp.i = icmp ugt i32 %0, %sum.010
- %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret i32 %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-; FADD (FAST)
-
-define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
-; CHECK-LABEL: @fadd_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
-; CHECK: %[[ADD1:.*]] = fadd fast <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[ADD2:.*]] = fadd fast <vscale x 8 x float> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[ADD:.*]] = fadd fast <vscale x 8 x float> %[[ADD2]], %[[ADD1]]
-; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> %[[ADD]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
- %0 = load float, ptr %arrayidx, align 4
- %add = fadd fast float %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret float %add
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfh" {
-; CHECK-LABEL: @fadd_fast_half_zvfh
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half>
-; CHECK: %[[FADD1:.*]] = fadd fast <vscale x 8 x half> %[[LOAD1]]
-; CHECK: %[[FADD2:.*]] = fadd fast <vscale x 8 x half> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = fadd fast <vscale x 8 x half> %[[FADD2]], %[[FADD1]]
-; CHECK: call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> %[[RDX]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %add = fadd fast half %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret half %add
-}
-
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2)
-define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfhmin" {
-; CHECK-LABEL: @fadd_fast_half_zvfhmin
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <16 x half>
-; CHECK: %[[LOAD2:.*]] = load <16 x half>
-; CHECK: %[[FADD1:.*]] = fadd fast <16 x half> %[[LOAD1]]
-; CHECK: %[[FADD2:.*]] = fadd fast <16 x half> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = fadd fast <16 x half> %[[FADD2]], %[[FADD1]]
-; CHECK: call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> %[[RDX]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %add = fadd fast half %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret half %add
-}
-
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2)
-define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfbfmin" {
-; CHECK-LABEL: @fadd_fast_bfloat
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <16 x bfloat>
-; CHECK: %[[LOAD2:.*]] = load <16 x bfloat>
-; CHECK: %[[FADD1:.*]] = fadd fast <16 x bfloat> %[[LOAD1]]
-; CHECK: %[[FADD2:.*]] = fadd fast <16 x bfloat> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = fadd fast <16 x bfloat> %[[FADD2]], %[[FADD1]]
-; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> %[[RDX]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
- %0 = load bfloat, ptr %arrayidx, align 4
- %add = fadd fast bfloat %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret bfloat %add
-}
-
-; FMIN (FAST)
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
-; CHECK-LABEL: @fmin_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
-; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
-; CHECK-NEXT: call float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
- %0 = load float, ptr %arrayidx, align 4
- %cmp.i = fcmp olt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret float %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
-; CHECK-LABEL: @fmin_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half>
-; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x half> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]]
-; CHECK-NEXT: call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %cmp.i = fcmp olt half %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret half %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
-; CHECK-LABEL: @fmin_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat>
-; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]]
-; CHECK-NEXT: call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
- %0 = load bfloat, ptr %arrayidx, align 4
- %cmp.i = fcmp olt bfloat %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret bfloat %.sroa.speculated
-}
-
-; FMAX (FAST)
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
-; CHECK-LABEL: @fmax_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
-; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
-; CHECK-NEXT: call fast float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
- %0 = load float, ptr %arrayidx, align 4
- %cmp.i = fcmp fast ogt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret float %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
-; CHECK-LABEL: @fmax_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half>
-; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x half> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]]
-; CHECK-NEXT: call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %cmp.i = fcmp fast ogt half %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret half %.sroa.speculated
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
-; CHECK-LABEL: @fmax_fast
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat>
-; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat>
-; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD1]]
-; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]]
-; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]]
-; CHECK-NEXT: call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]])
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
- %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
- %0 = load bfloat, ptr %arrayidx, align 4
- %cmp.i = fcmp fast ogt bfloat %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret bfloat %.sroa.speculated
-}
-
-; Reduction cannot be vectorized
-
-; MUL
-
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @mul
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <8 x i32>
-; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD1]]
-; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD2]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]]
-; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]])
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
- %0 = load i32, ptr %arrayidx, align 4
- %mul = mul nsw i32 %0, %sum.07
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body, %entry
- ret i32 %mul
-}
-
-; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
-; CHECK-LABEL: @memory_dependence
-; CHECK: vector.body:
-; CHECK: %[[LOAD1:.*]] = load <8 x i32>
-; CHECK: %[[LOAD2:.*]] = load <8 x i32>
-; CHECK: %[[LOAD3:.*]] = load <8 x i32>
-; CHECK: %[[LOAD4:.*]] = load <8 x i32>
-; CHECK: %[[ADD1:.*]] = add nsw <8 x i32> %[[LOAD3]], %[[LOAD1]]
-; CHECK: %[[ADD2:.*]] = add nsw <8 x i32> %[[LOAD4]], %[[LOAD2]]
-; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD3]]
-; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD4]]
-; CHECK: middle.block:
-; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]]
-; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]])
-entry:
- br label %for.body
-
-for.body:
- %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
- %0 = load i32, ptr %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
- %1 = load i32, ptr %arrayidx1, align 4
- %add = add nsw i32 %1, %0
- %add2 = add nuw nsw i64 %i, 32
- %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
- store i32 %add, ptr %arrayidx3, align 4
- %mul = mul nsw i32 %1, %sum
- %inc = add nuw nsw i64 %i, 1
- %exitcond.not = icmp eq i64 %inc, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-for.end:
- ret i32 %mul
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2)
-define float @fmuladd(ptr %a, ptr %b, i64 %n) {
-; CHECK-LABEL: @fmuladd(
-; CHECK: vector.body:
-; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>
-; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>
-; CHECK: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>
-; CHECK: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>
-; CHECK: [[MULADD1:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD3]],
-; CHECK: [[MULADD2:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD2]], <vscale x 4 x float> [[WIDE_LOAD4]],
-; CHECK: middle.block:
-; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <vscale x 4 x float> [[MULADD2]], [[MULADD1]]
-; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[BIN_RDX]])
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
- %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
- %0 = load float, ptr %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
- %1 = load float, ptr %arrayidx2, align 4
- %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
-
-for.end:
- ret float %muladd
-}
-
-; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" {
-; CHECK-LABEL: @fmuladd_f16_zvfh(
-; CHECK: vector.body:
-; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>
-; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x half>
-; CHECK: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x half>
-; CHECK: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x half>
-; CHECK: [[MULADD1:%.*]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD3]],
-; CHECK: [[MULADD2:%.*]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD2]], <vscale x 8 x half> [[WIDE_LOAD4]],
-; CHECK: middle.block:
-; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <vscale x 8 x half> [[MULADD2]], [[MULADD1]]
-; CHECK: call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, <vscale x 8 x half> [[BIN_RDX]])
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv
- %1 = load half, ptr %arrayidx2, align 4
- %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
-
-for.end:
- ret half %muladd
-}
-
-
-; We can't scalably vectorize reductions of f16 with zvfhmin or bf16 with zvfbfmin, so make sure we use fixed-length vectors instead.
-
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2)
-define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvfhmin" {
-; CHECK-LABEL: @fmuladd_f16_zvfhmin(
-; CHECK: vector.body:
-; CHECK: [[WIDE_LOAD:%.*]] = load <16 x half>
-; CHECK: [[WIDE_LOAD2:%.*]] = load <16 x half>
-; CHECK: [[WIDE_LOAD3:%.*]] = load <16 x half>
-; CHECK: [[WIDE_LOAD4:%.*]] = load <16 x half>
-; CHECK: [[MULADD1:%.*]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]],
-; CHECK: [[MULADD2:%.*]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]],
-; CHECK: middle.block:
-; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[MULADD2]], [[MULADD1]]
-; CHECK: call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]])
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
- %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
- %0 = load half, ptr %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv
- %1 = load half, ptr %arrayidx2, align 4
- %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
-
-for.end:
- ret half %muladd
-}
-
-; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
-; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2)
-define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin" {
-; CHECK-LABEL: @fmuladd_bf16(
-; CHECK: vector.body:
-; CHECK: [[WIDE_LOAD:%.*]] = load <16 x bfloat>
-; CHECK: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>
-; CHECK: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>
-; CHECK: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>
-; CHECK: [[MULADD1:%.*]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]],
-; CHECK: [[MULADD2:%.*]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]],
-; CHECK: middle.block:
-; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[MULADD2]], [[MULADD1]]
-; CHECK: call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]])
-;
-entry:
- br label %for.body
-
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
- %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
- %0 = load bfloat, ptr %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds bfloat, ptr %b, i64 %iv
- %1 = load bfloat, ptr %arrayidx2, align 4
- %muladd = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat %0, bfloat %1, bfloat %sum.07)
- %iv.next = add nuw nsw i64 %iv, 1
- %exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
-
-for.end:
- ret bfloat %muladd
-}
-
-declare float @llvm.fmuladd.f32(float, float, float)
-
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
-attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfhmin,+zvfhmin"}
-attributes #2 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfbfmin,+zvfbfmin"}
-
-!0 = distinct !{!0, !1, !2, !3, !4}
-!1 = !{!"llvm.loop.vectorize.width", i32 8}
-!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-!3 = !{!"llvm.loop.interleave.count", i32 2}
-!4 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
index 745b8ba..5c6febc 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll
@@ -1,60 +1,57 @@
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \
-; RUN: < %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \
-; RUN: -scalable-vectorization=on -S < %s | FileCheck %s -check-prefix=SCALABLE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
-target triple = "riscv64"
-
-define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 {
-; CHECK-LABEL: @select_icmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @select_icmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sge <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) {
+; CHECK-LABEL: define i32 @select_icmp(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr readonly captures(none) [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp sge <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP12]], [[X]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[COND_LCSSA]]
;
entry:
br label %for.body
@@ -74,56 +71,57 @@ for.end:
ret i32 %cond
}
-define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 {
-; CHECK-LABEL: @select_fcmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast uge <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @select_fcmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i64 0
-; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast uge <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) {
+; CHECK-LABEL: define i32 @select_fcmp(
+; CHECK-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr readonly captures(none) [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast uge <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP12]], [[X]]
+; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[COND_LCSSA]]
;
entry:
br label %for.body
@@ -143,52 +141,55 @@ for.end:
ret i32 %cond
}
-define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
-; CHECK-LABEL: @select_const_i32_from_icmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @select_const_i32_from_icmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
+; CHECK-LABEL: define i32 @select_const_i32_from_icmp(
+; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 7, i32 3
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3
+; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 7
+; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
entry:
br label %for.body
@@ -208,52 +209,55 @@ exit: ; preds = %for.body
ret i32 %5
}
-define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
-; CHECK-LABEL: @select_i32_from_icmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @select_i32_from_icmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
+; CHECK-LABEL: define i32 @select_i32_from_icmp(
+; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[B]], i32 [[A]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3
+; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 [[B]]
+; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
entry:
br label %for.body
@@ -273,52 +277,55 @@ exit: ; preds = %for.body
ret i32 %5
}
-define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
-; CHECK-LABEL: @select_const_i32_from_fcmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
-; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @select_const_i32_from_fcmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast one <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
-; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) {
+; CHECK-LABEL: define i32 @select_const_i32_from_fcmp(
+; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast one <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 1, i32 2
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ueq float [[TMP15]], 3.000000e+00
+; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]]
+; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[DOTLCSSA]]
;
entry:
br label %for.body
@@ -338,12 +345,24 @@ exit: ; preds = %for.body
ret i32 %5
}
-define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
-; CHECK-LABEL: @select_const_f32_from_icmp
-; CHECK-NOT: vector.body
-;
-; SCALABLE-LABEL: @select_const_f32_from_icmp
-; SCALABLE-NOT: vector.body
+define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) {
+; CHECK-LABEL: define float @select_const_f32_from_icmp(
+; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3
+; CHECK-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00
+; CHECK-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], %[[FOR_BODY]] ]
+; CHECK-NEXT: ret float [[DOTLCSSA]]
;
entry:
br label %for.body
@@ -363,60 +382,67 @@ exit: ; preds = %for.body
ret float %5
}
-define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 {
-; CHECK-LABEL: @pred_select_const_i32_from_icmp
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 35)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP5]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison)
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2)
-; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]]
-; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) {
+; CHECK-LABEL: define i32 @pred_select_const_i32_from_icmp(
+; CHECK-SAME: ptr noalias readonly captures(none) [[SRC1:%.*]], ptr noalias readonly captures(none) [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35)
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2)
+; CHECK-NEXT: [[TMP10:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]]
+; CHECK-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]])
-; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]]
-; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0
-; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
-;
-; SCALABLE-LABEL: @pred_select_const_i32_from_icmp
-; SCALABLE: vector.ph:
-; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
-; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]]
-; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]]
-; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; SCALABLE-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
-; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
-; SCALABLE: vector.body:
-; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
-; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
-; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35)
-; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[INDEX]]
-; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison)
-; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2)
-; SCALABLE-NEXT: [[TMP13:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP12]]
-; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> [[VEC_PHI]]
-; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
-; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; SCALABLE: middle.block:
-; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]])
-; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]]
-; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0
-; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec
+; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]])
+; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]]
+; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %[[FOR_INC]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 35
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP15]], 2
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
+; CHECK-NEXT: br label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[R_1]] = phi i32 [ [[R_012]], %[[FOR_BODY]] ], [ [[SPEC_SELECT]], %[[IF_THEN]] ]
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i32 [[R_1_LCSSA]]
;
entry:
br label %for.body
@@ -446,5 +472,3 @@ for.end.loopexit: ; preds = %for.inc
%r.1.lcssa = phi i32 [ %r.1, %for.inc ]
ret i32 %r.1.lcssa
}
-
-attributes #0 = { "target-features"="+f,+v" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index 7c74981..90d261b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -250,3 +250,78 @@ loop:
exit:
ret void
}
+
+; Test case for https://github.com/llvm/llvm-project/issues/151686.
+define i8 @multiple_inductions_start_at_0() {
+; CHECK-LABEL: @multiple_inductions_start_at_0(
+; CHECK-NEXT: iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <32 x i8> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <32 x i8> [[STEP_ADD]], zeroinitializer
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <32 x i8> [[STEP_ADD_2]], zeroinitializer
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 128
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[STEP_ADD_3]], zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[STEP_ADD_3]], i32 31
+; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK: vec.epilog.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK: vec.epilog.ph:
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[BC_RESUME_VAL]] to i8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK: vec.epilog.vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <4 x i8> [[VEC_IND2]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1052
+; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: vec.epilog.middle.block:
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[VEC_IND2]], i32 3
+; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK: vec.epilog.scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ 1052, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ -469762048, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ADD]] = add i32 [[IV_2]], -16777216
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[IV_2]] to i8
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], 1050
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: ret i8 [[RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.2 = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %add = add i32 %iv.2, -16777216
+ %trunc = trunc i32 %iv.2 to i8
+ %iv.next = add i32 %iv, 1
+ %ec = icmp ugt i32 %iv, 1050
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ %res = phi i8 [ %trunc, %loop ]
+ ret i8 %res
+}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index e89f41b..97b5210 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -142,40 +142,40 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; AUTO_VEC: for.body.preheader.new:
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640
-; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4
-; AUTO_VEC-NEXT: [[INVARIANT_GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8
-; AUTO_VEC-NEXT: [[INVARIANT_GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12
-; AUTO_VEC-NEXT: [[INVARIANT_GEP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16
-; AUTO_VEC-NEXT: [[INVARIANT_GEP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 20
-; AUTO_VEC-NEXT: [[INVARIANT_GEP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 24
-; AUTO_VEC-NEXT: [[INVARIANT_GEP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 28
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]
; AUTO_VEC: for.body:
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4
; AUTO_VEC-NEXT: [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4
; AUTO_VEC-NEXT: store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4
; AUTO_VEC-NEXT: [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP1]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8
; AUTO_VEC-NEXT: store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4
; AUTO_VEC-NEXT: [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP3]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 12
; AUTO_VEC-NEXT: store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4
; AUTO_VEC-NEXT: [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP5]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16
; AUTO_VEC-NEXT: store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4
; AUTO_VEC-NEXT: [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP7]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 20
; AUTO_VEC-NEXT: store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4
; AUTO_VEC-NEXT: [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP9]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 24
; AUTO_VEC-NEXT: store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4
; AUTO_VEC-NEXT: [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01
-; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP11]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 28
; AUTO_VEC-NEXT: store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4
; AUTO_VEC-NEXT: [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01
; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
@@ -299,40 +299,40 @@ define double @external_use_without_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]]
; AUTO_VEC: entry.new:
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800
-; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 8
-; AUTO_VEC-NEXT: [[INVARIANT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 16
-; AUTO_VEC-NEXT: [[INVARIANT_GEP4:%.*]] = getelementptr i8, ptr [[A]], i64 24
-; AUTO_VEC-NEXT: [[INVARIANT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 32
-; AUTO_VEC-NEXT: [[INVARIANT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 40
-; AUTO_VEC-NEXT: [[INVARIANT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 48
-; AUTO_VEC-NEXT: [[INVARIANT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 56
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]
; AUTO_VEC: for.body:
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[I]]
; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8
; AUTO_VEC-NEXT: [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr double, ptr [[INVARIANT_GEP]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 8
; AUTO_VEC-NEXT: store double [[J_NEXT]], ptr [[T0_1]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr double, ptr [[INVARIANT_GEP2]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16
; AUTO_VEC-NEXT: store double [[J_NEXT_1]], ptr [[T0_2]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr double, ptr [[INVARIANT_GEP4]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24
; AUTO_VEC-NEXT: store double [[J_NEXT_2]], ptr [[T0_3]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr double, ptr [[INVARIANT_GEP6]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr i8, ptr [[TMP4]], i64 32
; AUTO_VEC-NEXT: store double [[J_NEXT_3]], ptr [[T0_4]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr double, ptr [[INVARIANT_GEP8]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr i8, ptr [[TMP5]], i64 40
; AUTO_VEC-NEXT: store double [[J_NEXT_4]], ptr [[T0_5]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr double, ptr [[INVARIANT_GEP10]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr i8, ptr [[TMP6]], i64 48
; AUTO_VEC-NEXT: store double [[J_NEXT_5]], ptr [[T0_6]], align 8
; AUTO_VEC-NEXT: [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00
-; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr double, ptr [[INVARIANT_GEP12]], i64 [[I]]
+; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 56
; AUTO_VEC-NEXT: store double [[J_NEXT_6]], ptr [[T0_7]], align 8
; AUTO_VEC-NEXT: [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8
; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00
diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
index e79995f..f329a18f 100644
--- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll
@@ -7,21 +7,46 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]]
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
+; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[LOOP_END:%.*]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]]
+; CHECK-NEXT: br label [[LOOP_END]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
index b8da9ac..b6a0346 100644
--- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
@@ -62,3 +62,76 @@ loop:
exit:
ret void
}
+
+; Test case for https://github.com/llvm/llvm-project/issues/151392.
+define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
+; CHECK-LABEL: define void @single_scalar_cast_stored(
+; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 2
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 2
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
+; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[SRC]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[L]], 0
+; CHECK-NEXT: [[L_EXT:%.*]] = zext i16 [[L]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[L_EXT]], 15
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[AND]]
+; CHECK-NEXT: [[SEL_TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: store i16 [[SEL_TRUNC]], ptr [[DST]], align 2
+; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %l = load i16, ptr %src, align 2
+ %cmp = icmp eq i16 %l, 0
+ %l.ext = zext i16 %l to i32
+ %and = and i32 %l.ext, 15
+ %sel = select i1 %cmp, i32 0, i32 %and
+ %sel.trunc = trunc i32 %sel to i16
+ store i16 %sel.trunc, ptr %dst, align 2
+ %iv.next = add nuw i32 %iv, 1
+ %ec = icmp ne i32 %iv.next, %n
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 018e2c21..033907e 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -4,9 +4,9 @@
; We have 3 loops, two of them are vectorizable (with one being early-exit
; vectorized) and the third one is not.
-; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
-; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
-; CHECK: 2 loop-vectorize - Number of loops vectorized
+; CHECK: 4 loop-vectorize - Number of loops analyzed for vectorization
+; CHECK: 2 loop-vectorize - Number of early exit loops vectorized
+; CHECK: 3 loop-vectorize - Number of loops vectorized
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -15,19 +15,19 @@ entry:
%cmp1 = icmp sle i64 %size, 0
%cmp21 = icmp sgt i64 0, %size
%or.cond = or i1 %cmp1, %cmp21
- br i1 %or.cond, label %for.end, label %for.body
+ br i1 %or.cond, label %exit, label %loop
-for.body: ; preds = %entry, %for.body
- %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%mul = fmul float %0, %0
store float %mul, ptr %arrayidx, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
- %cmp2 = icmp sgt i64 %indvars.iv.next, %size
- br i1 %cmp2, label %for.end, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %cmp2 = icmp sgt i64 %iv.next, %size
+ br i1 %cmp2, label %exit, label %loop
-for.end: ; preds = %entry, %for.body
+exit: ; preds = %entry, %loop
ret void
}
@@ -38,9 +38,9 @@ entry:
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
%end.clamped = and i64 %end, 1023
- br label %for.body
+ br label %loop
-for.body:
+loop:
%ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ]
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
%0 = load i32, ptr %arrayidx1, align 4
@@ -52,7 +52,7 @@ for.body:
for.inc:
%ind.next = add i64 %ind, 1
%cmp = icmp ult i64 %ind.next, %end.clamped
- br i1 %cmp, label %for.body, label %exit
+ br i1 %cmp, label %loop, label %exit
found:
ret i32 1
@@ -66,25 +66,50 @@ entry:
%cmp1 = icmp sle i64 %size, 0
%cmp21 = icmp sgt i64 0, %size
%or.cond = or i1 %cmp1, %cmp21
- br i1 %or.cond, label %for.end, label %for.body
+ br i1 %or.cond, label %exit, label %loop
-for.body: ; preds = %entry, %for.body
- %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %0 = add nsw i64 %indvars.iv2, -5
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %0 = add nsw i64 %iv, -5
%arrayidx = getelementptr inbounds float, ptr %a, i64 %0
%1 = load float, ptr %arrayidx, align 4
- %2 = add nsw i64 %indvars.iv2, 2
+ %2 = add nsw i64 %iv, 2
%arrayidx2 = getelementptr inbounds float, ptr %a, i64 %2
%3 = load float, ptr %arrayidx2, align 4
%mul = fmul float %1, %3
- %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+ %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv
store float %mul, ptr %arrayidx4, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
- %cmp2 = icmp sgt i64 %indvars.iv.next, %size
- br i1 %cmp2, label %for.end, label %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %cmp2 = icmp sgt i64 %iv.next, %size
+ br i1 %cmp2, label %exit, label %loop
-for.end: ; preds = %entry, %for.body
+exit:
ret void
}
+define i1 @multiple_countable_exits_multiple_exit_block(ptr %A, ptr %B, i32 %N) {
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %cond.0 = icmp eq i32 %iv, %N
+ br i1 %cond.0, label %exit.0, label %loop.latch
+
+loop.latch:
+ %A.gep = getelementptr inbounds i32, ptr %A, i32 %iv
+ %lv = load i32, ptr %A.gep, align 4
+ %B.gep = getelementptr inbounds i32, ptr %B, i32 %iv
+ store i32 %lv, ptr %B.gep, align 4
+ %iv.next = add nuw i32 %iv, 1
+ %cond.1 = icmp ult i32 %iv.next, 1000
+ br i1 %cond.1, label %loop.header, label %exit.1
+
+exit.0:
+ ret i1 false
+
+exit.1:
+ ret i1 true
+}
+
declare void @init_mem(ptr, i64);
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
index 3d44317..e118520 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
@@ -329,19 +329,14 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF8UF2: [[VECTOR_PH]]:
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
-; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8
-; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
+; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8
+; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
-; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], [[TMP3]]
; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]])
-; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; VF8UF2-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
-; VF8UF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
; VF8UF2: [[MIDDLE_SPLIT]]:
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
@@ -360,7 +355,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF8UF2: [[LOOP_LATCH]]:
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
-; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
; VF8UF2-NEXT: ret i8 [[RES]]
@@ -372,15 +367,10 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF16UF1: [[VECTOR_PH]]:
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
-; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
-; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
+; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
-; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
-; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; VF16UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
-; VF16UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
; VF16UF1: [[MIDDLE_SPLIT]]:
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
@@ -399,7 +389,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn
; VF16UF1: [[LOOP_LATCH]]:
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
-; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
; VF16UF1-NEXT: ret i8 [[RES]]
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll b/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll
new file mode 100644
index 0000000..b681ecdc
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll
@@ -0,0 +1,1103 @@
+;; Test for iterative node merging. This is an llvm-reduced version of the xalancbmk
+;; benchmark with FullLTO and memprof.
+
+;; -stats requires asserts
+; REQUIRES: asserts
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \
+; RUN: -memprof-merge-iteration=false %s -S 2>&1 | FileCheck %s --check-prefix=NOITER
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \
+; RUN: -memprof-merge-iteration=true %s -S 2>&1 | FileCheck %s --check-prefix=ITER
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \
+; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=ITER
+
+; NOITER-NOT: _ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb.memprof.2
+; NOITER: 7 memprof-context-disambiguation - Number of function clones created during whole program analysis
+; NOITER: 1 memprof-context-disambiguation - Max merge iterations for nodes
+; NOITER: 2 memprof-context-disambiguation - Number of new nodes created during merging
+
+; ITER: _ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb.memprof.2
+; ITER: 8 memprof-context-disambiguation - Number of function clones created during whole program analysis
+; ITER: 3 memprof-context-disambiguation - Max merge iterations for nodes
+; ITER: 3 memprof-context-disambiguation - Number of new nodes created during merging
+
+; ModuleID = 'reduced.bc'
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%"class.xercesc_2_5::XMLNumber" = type { %"class.xercesc_2_5::XMLEnumerator" }
+%"class.xercesc_2_5::XMLEnumerator" = type { ptr }
+
+@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_XPath.cpp, ptr null }]
+@_ZTVN10xalanc_1_822FunctionNormalizeSpaceE = constant { [11 x ptr] } { [11 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr null, ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeEPKN11xercesc_2_57LocatorE, ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeENS_10XObjectPtrEPKN11xercesc_2_57LocatorE, ptr null, ptr null, ptr null, ptr null] }
+@_ZTVN10__cxxabiv121__vmi_class_type_infoE = constant { [10 x ptr] } zeroinitializer
+@_ZTVN10__cxxabiv119__pointer_type_infoE = constant { [7 x ptr] } zeroinitializer
+@_ZTVSt13bad_exception = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt13bad_exception, ptr @_ZNSt13bad_exceptionD1Ev, ptr null, ptr null] }
+@_ZTISt13bad_exception = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception }
+@_ZTISt9bad_alloc = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception }
+@_ZTVSt8bad_cast = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt8bad_cast, ptr @_ZNSt8bad_castD1Ev, ptr null, ptr null] }
+@_ZTVSt10bad_typeid = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt10bad_typeid, ptr @_ZNSt10bad_typeidD1Ev, ptr null, ptr null] }
+@_ZTVN10__cxxabiv117__class_type_infoE = constant { [10 x ptr] } zeroinitializer
+@_ZTISt8bad_cast = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception }
+@_ZTVN10__cxxabiv120__si_class_type_infoE = constant { [10 x ptr] } zeroinitializer
+@_ZTISt9exception = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr null }
+@_ZTISt10bad_typeid = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception }
+@_ZTVSt9exception = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt9exception, ptr @_ZNSt9exceptionD2Ev, ptr null, ptr null] }
+
+@_ZN10xalanc_1_814XalanDOMStringC1EPKcj = alias void (ptr, ptr, i32), ptr @_ZN10xalanc_1_814XalanDOMStringC2EPKcj
+@_Znwm = alias ptr (i64), ptr @TCMallocInternalNew
+@_ZdlPvm = alias void (ptr, i64), ptr @TCMallocInternalDeleteSized
+@_Znam = alias ptr (i64), ptr @TCMallocInternalNew
+@_ZdaPv = alias void (ptr), ptr @TCMallocInternalDelete
+@_ZdlPv = alias void (ptr), ptr @TCMallocInternalDelete
+@_ZnwmRKSt9nothrow_t = alias ptr (i64, ptr), ptr @TCMallocInternalNewNothrow
+@_ZnamRKSt9nothrow_t = alias ptr (i64, ptr), ptr @TCMallocInternalNewNothrow
+@_ZdlPvRKSt9nothrow_t = alias void (ptr, ptr), ptr @TCMallocInternalDelete
+@_ZdaPvRKSt9nothrow_t = alias void (ptr, ptr), ptr @TCMallocInternalDelete
+@_ZnwmSt11align_val_t = alias ptr (i64, i64), ptr @TCMallocInternalNewAligned
+@_ZnwmSt11align_val_tRKSt9nothrow_t = alias ptr (i64, i64, ptr), ptr @TCMallocInternalNewAlignedNothrow
+@_ZdlPvSt11align_val_t = alias void (ptr, i64), ptr @TCMallocInternalDelete
+@_ZdlPvSt11align_val_tRKSt9nothrow_t = alias void (ptr, i64, ptr), ptr @TCMallocInternalDelete
+@_ZdlPvmSt11align_val_t = alias void (ptr, i64, i64), ptr @TCMallocInternalDeleteSizedAligned
+@_ZnamSt11align_val_t = alias ptr (i64, i64), ptr @TCMallocInternalNewAligned
+@_ZnamSt11align_val_tRKSt9nothrow_t = alias ptr (i64, i64, ptr), ptr @TCMallocInternalNewAlignedNothrow
+@_ZdaPvSt11align_val_t = alias void (ptr, i64), ptr @TCMallocInternalDelete
+@_ZdaPvSt11align_val_tRKSt9nothrow_t = alias void (ptr, i64, ptr), ptr @TCMallocInternalDelete
+@_ZdaPvmSt11align_val_t = alias void (ptr, i64, i64), ptr @TCMallocInternalDeleteSizedAligned
+@_ZNSt13exception_ptrD1Ev = alias void (ptr), ptr @_ZNSt13exception_ptrD2Ev
+@_ZNSt13exception_ptrC1ERKS_ = alias void (ptr, ptr), ptr @_ZNSt13exception_ptrC2ERKS_
+@_ZNSt13bad_exceptionD1Ev = alias void (ptr), ptr @_ZNSt9exceptionD2Ev
+@_ZNSt8bad_castD1Ev = alias void (ptr), ptr @_ZNSt8bad_castD2Ev
+@_ZNSt10bad_typeidD1Ev = alias void (ptr), ptr @_ZNSt10bad_typeidD2Ev
+
+define ptr @_ZNSt3__u6vectorItNS_9allocatorItEEE7reserveEm() {
+ %1 = tail call ptr @_Znwm(i64 0), !memprof !29, !callsite !592
+ ret ptr %1
+}
+
+; Function Attrs: cold
+declare void @_ZN10xalanc_1_88FunctionC2Ev() #0
+
+define void @_ZN10xalanc_1_812FunctionLangC2Ev() {
+ call void @_ZN10xalanc_1_88FunctionC2Ev()
+ call void @_ZN10xalanc_1_814XalanDOMStringC1EPKcj(ptr null, ptr null, i32 0), !callsite !593
+ ret void
+}
+
+define void @_ZN10xalanc_1_822FunctionNormalizeSpaceC2Ev(ptr %0) {
+ store ptr @_ZTVN10xalanc_1_822FunctionNormalizeSpaceE, ptr %0, align 8
+ ret void
+}
+
+define void @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeEPKN11xercesc_2_57LocatorE() {
+ call void @_ZN10xalanc_1_818XalanMessageLoader10getMessageENS_13XalanMessages5CodesEPKcS4_S4_S4_()
+ ret void
+}
+
+define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeENS_10XObjectPtrEPKN11xercesc_2_57LocatorE() {
+ %1 = call ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_10XObjectPtrE()
+ ret ptr %1
+}
+
+define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_10XObjectPtrE() {
+ %1 = load ptr, ptr null, align 8
+ %2 = getelementptr i8, ptr %1, i64 72
+ %3 = load ptr, ptr %2, align 8
+ %4 = tail call ptr %3(ptr null)
+ %5 = call ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_14XalanDOMStringE()
+ ret ptr %5
+}
+
+define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_14XalanDOMStringE() {
+ %1 = call ptr @_ZNSt3__u6vectorItNS_9allocatorItEEE7reserveEm()
+ ret ptr %1
+}
+
+declare i64 @mbstowcs()
+
+define void @_GLOBAL__sub_I_XPath.cpp() {
+ tail call void @_ZN10xalanc_1_818XPathFunctionTableC2Eb()
+ ret void
+}
+
+define void @_ZN10xalanc_1_818XPathFunctionTableC2Eb() {
+ call void @_ZN10xalanc_1_818XPathFunctionTable11CreateTableEv()
+ ret void
+}
+
+define void @_ZN10xalanc_1_818XPathFunctionTable11CreateTableEv() {
+ %1 = alloca %"class.xercesc_2_5::XMLNumber", align 8
+ call void @_ZN10xalanc_1_812FunctionLangC2Ev()
+ call void @_ZN10xalanc_1_822FunctionNormalizeSpaceC2Ev(ptr %1)
+ ret void
+}
+
+define void @_ZN10xalanc_1_814XalanDOMStringC2EPKcj(ptr %0, ptr %1, i32 %2) #1 {
+ %4 = call ptr @_ZN10xalanc_1_814XalanDOMString6appendEPKcj(ptr %0, ptr %1, i32 %2), !callsite !594
+ ret void
+}
+
+; Function Attrs: cold
+define ptr @_ZN10xalanc_1_814XalanDOMString6appendEPKcj(ptr %0, ptr %1, i32 %2) #0 {
+ %4 = load i32, ptr %0, align 8
+ %5 = icmp eq i32 %4, 0
+ br i1 %5, label %common.ret, label %6
+
+common.ret: ; preds = %3
+ tail call fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %1, i32 %2, ptr %0, i1 true), !callsite !595
+ ret ptr %0
+
+6: ; preds = %3
+ call fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr null, i32 1, ptr null, i1 false)
+ unreachable
+}
+
+define fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, i32 %1, ptr %2, i1 %3) !prof !596 {
+ %5 = icmp eq i32 %1, 1
+ br i1 %5, label %6, label %9
+
+6: ; preds = %4
+ %7 = call fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr %2, i1 %3)
+ br i1 %7, label %11, label %8
+
+8: ; preds = %6
+ ret void
+
+9: ; preds = %4
+ %10 = call fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr null, i1 false), !callsite !597
+ br label %11
+
+11: ; preds = %9, %6
+ ret void
+}
+
+define fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr %1, i1 %2) {
+ %4 = icmp eq ptr %0, null
+ br i1 %4, label %5, label %7
+
+5: ; preds = %3
+ %6 = load i64, ptr %1, align 8
+ %cond = icmp eq i64 %6, 0
+ ret i1 %cond
+
+7: ; preds = %3
+ %8 = call i64 @mbstowcs()
+ %9 = zext i1 %2 to i64
+ call void @_ZNSt3__u6vectorIwNS_9allocatorIwEEE8__appendEm(), !callsite !598
+ ret i1 false
+}
+
+define void @_ZNSt3__u6vectorIwNS_9allocatorIwEEE8__appendEm() {
+ %1 = tail call ptr @_Znwm(i64 0), !memprof !599, !callsite !768
+ ret void
+}
+
+; Function Attrs: cold
+define void @_ZN10xalanc_1_826XalanInMemoryMessageLoaderC2Ev() #0 {
+ call void @_ZN10xalanc_1_814XalanDOMStringC1EPKcj(ptr null, ptr null, i32 0), !callsite !769
+ ret void
+}
+
+define void @_ZN10xalanc_1_818XalanMessageLoader12createLoaderEv() {
+ %1 = tail call ptr @_Znwm(i64 0)
+ call void @_ZN10xalanc_1_826XalanInMemoryMessageLoaderC2Ev(), !callsite !770
+ ret void
+}
+
+define void @_ZN10xalanc_1_818XalanMessageLoader10getMessageENS_13XalanMessages5CodesEPKcS4_S4_S4_() {
+ tail call void @_ZN10xalanc_1_818XalanMessageLoader12createLoaderEv()
+ ret void
+}
+
+define void @TCMallocInternalDeleteSized() {
+ ret void
+}
+
+; Function Attrs: nobuiltin noinline
+define ptr @TCMallocInternalNew(i64 %0) #2 {
+ ret ptr null
+}
+
+define void @TCMallocInternalDelete() {
+ ret void
+}
+
+define i64 @TCMallocInternalNewNothrow() {
+ ret i64 0
+}
+
+define i64 @TCMallocInternalNewAligned() {
+ ret i64 0
+}
+
+define i64 @TCMallocInternalNewAlignedNothrow() {
+ ret i64 0
+}
+
+define void @TCMallocInternalDeleteSizedAligned() {
+ ret void
+}
+
+define i1 @_ZSt18uncaught_exceptionv() {
+ ret i1 false
+}
+
+define void @_ZNSt13exception_ptrD2Ev() {
+ ret void
+}
+
+define void @_ZNSt13exception_ptrC2ERKS_() {
+ ret void
+}
+
+define ptr @_ZNSt13exception_ptraSERKS_() {
+ ret ptr null
+}
+
+define void @_ZSt17rethrow_exceptionSt13exception_ptr() {
+ unreachable
+}
+
+define void @_ZSt17__throw_bad_allocv() {
+ unreachable
+}
+
+define void @__cxa_bad_cast() {
+ unreachable
+}
+
+define ptr @__cxa_allocate_exception() {
+ ret ptr null
+}
+
+define ptr @__cxa_begin_catch() {
+ ret ptr null
+}
+
+define void @__cxa_free_exception() {
+ ret void
+}
+
+define void @__cxa_throw() {
+ unreachable
+}
+
+define void @__cxa_end_catch() {
+ ret void
+}
+
+define ptr @__cxa_current_exception_type() {
+ ret ptr null
+}
+
+define void @__cxa_rethrow() {
+ ret void
+}
+
+define void @_ZSt9terminatev() {
+ ret void
+}
+
+define i32 @__gxx_personality_v0() {
+ ret i32 0
+}
+
+define void @__cxa_call_unexpected() {
+ ret void
+}
+
+define ptr @__dynamic_cast() {
+ ret ptr null
+}
+
+define void @_ZNSt9exceptionD2Ev() {
+ ret void
+}
+
+define void @_ZNSt8bad_castD2Ev() {
+ ret void
+}
+
+define void @_ZNSt10bad_typeidD2Ev() {
+ ret void
+}
+
+attributes #0 = { cold }
+attributes #1 = { "target-features"="+aes" }
+attributes #2 = { nobuiltin noinline }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 331263925478}
+!4 = !{!"MaxCount", i64 89521949747}
+!5 = !{!"MaxInternalCount", i64 89521949747}
+!6 = !{!"MaxFunctionCount", i64 14842374247}
+!7 = !{!"NumCounts", i64 80529}
+!8 = !{!"NumFunctions", i64 13237}
+!9 = !{!"IsPartialProfile", i64 0}
+!10 = !{!"PartialProfileRatio", double 0.000000e+00}
+!11 = !{!"DetailedSummary", !12}
+!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28}
+!13 = !{i32 10000, i64 89521949747, i32 1}
+!14 = !{i32 100000, i64 89521949747, i32 1}
+!15 = !{i32 200000, i64 89521949747, i32 1}
+!16 = !{i32 300000, i64 89454229684, i32 2}
+!17 = !{i32 400000, i64 89454229684, i32 2}
+!18 = !{i32 500000, i64 89454229684, i32 2}
+!19 = !{i32 600000, i64 28686354153, i32 3}
+!20 = !{i32 700000, i64 12169900676, i32 5}
+!21 = !{i32 800000, i64 2585869019, i32 9}
+!22 = !{i32 900000, i64 1189366531, i32 32}
+!23 = !{i32 950000, i64 137116556, i32 82}
+!24 = !{i32 990000, i64 24641624, i32 286}
+!25 = !{i32 999000, i64 832911, i32 881}
+!26 = !{i32 999900, i64 110792, i32 1739}
+!27 = !{i32 999990, i64 20910, i32 2245}
+!28 = !{i32 999999, i64 650, i32 2817}
+!29 = !{!30, !32, !34, !36, !38, !40, !42, !44, !46, !48, !50, !52, !54, !56, !58, !60, !62, !64, !66, !68, !70, !72, !74, !76, !78, !80, !82, !84, !86, !88, !90, !92, !94, !96, !98, !100, !102, !104, !106, !108, !110, !112, !114, !116, !118, !120, !122, !124, !126, !128, !130, !132, !134, !136, !138, !140, !142, !144, !146, !148, !150, !152, !154, !156, !158, !160, !162, !164, !166, !168, !170, !172, !174, !176, !178, !180, !182, !184, !186, !188, !190, !192, !194, !196, !198, !200, !202, !204, !206, !208, !210, !212, !214, !216, !218, !220, !222, !224, !226, !228, !230, !232, !234, !236, !238, !240, !242, !244, !246, !248, !250, !252, !254, !256, !258, !260, !262, !264, !266, !268, !270, !272, !274, !276, !278, !280, !282, !284, !286, !288, !290, !292, !294, !296, !298, !300, !302, !304, !306, !308, !310, !312, !314, !316, !318, !320, !322, !324, !326, !328, !330, !332, !334, !336, !338, !340, !342, !344, !346, !348, !350, !352, !354, !356, !358, !360, !362, !364, !366, !368, !370, !372, !374, !376, !378, !380, !382, !384, !386, !388, !390, !392, !394, !396, !398, !400, !402, !404, !406, !408, !410, !412, !414, !416, !418, !420, !422, !424, !426, !428, !430, !432, !434, !436, !438, !440, !442, !444, !446, !448, !450, !452, !454, !456, !458, !460, !462, !464, !466, !468, !470, !472, !474, !476, !478, !480, !482, !484, !486, !488, !490, !492, !494, !496, !498, !500, !502, !504, !506, !508, !510, !512, !514, !516, !518, !520, !522, !524, !526, !528, !530, !532, !534, !536, !538, !540, !542, !544, !546, !548, !550, !552, !554, !556, !558, !560, !562, !564, !566, !568, !570, !572, !574, !576, !578, !580, !582, !584, !586, !588, !590}
+!30 = !{!31, !"cold"}
+!31 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 6029234927090217085}
+!32 = !{!33, !"notcold"}
+!33 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -4247704025117397876, i64 7919823637914964443, i64 2053928606242451379, i64 -5479684209450392625, i64 4312698517630782220, i64 5379466077518675850}
+!34 = !{!35, !"cold"}
+!35 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -4247704025117397876, i64 7919823637914964443, i64 2053928606242451379, i64 -5479684209450392625, i64 4312698517630782220, i64 -1805555115991223293}
+!36 = !{!37, !"cold"}
+!37 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -2032085148702428395}
+!38 = !{!39, !"cold"}
+!39 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!40 = !{!41, !"cold"}
+!41 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!42 = !{!43, !"notcold"}
+!43 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!44 = !{!45, !"cold"}
+!45 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 8647084438977525618}
+!46 = !{!47, !"cold"}
+!47 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 6798683962384280640}
+!48 = !{!49, !"cold"}
+!49 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400}
+!50 = !{!51, !"cold"}
+!51 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!52 = !{!53, !"cold"}
+!53 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 6798683962384280640}
+!54 = !{!55, !"cold"}
+!55 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400}
+!56 = !{!57, !"notcold"}
+!57 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!58 = !{!59, !"cold"}
+!59 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640}
+!60 = !{!61, !"cold"}
+!61 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -6416746930642965881, i64 -1079892354093417124, i64 -1079892354093417124, i64 6798683962384280640}
+!62 = !{!63, !"notcold"}
+!63 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -6416746930642965881, i64 -1079892354093417124, i64 -1079892354093417124, i64 -1079892354093417124}
+!64 = !{!65, !"cold"}
+!65 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -1079892354093417124}
+!66 = !{!67, !"cold"}
+!67 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!68 = !{!69, !"cold"}
+!69 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!70 = !{!71, !"cold"}
+!71 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!72 = !{!73, !"cold"}
+!73 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!74 = !{!75, !"cold"}
+!75 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!76 = !{!77, !"cold"}
+!77 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!78 = !{!79, !"cold"}
+!79 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!80 = !{!81, !"cold"}
+!81 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!82 = !{!83, !"cold"}
+!83 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!84 = !{!85, !"cold"}
+!85 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!86 = !{!87, !"cold"}
+!87 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!88 = !{!89, !"cold"}
+!89 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!90 = !{!91, !"cold"}
+!91 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!92 = !{!93, !"cold"}
+!93 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!94 = !{!95, !"cold"}
+!95 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!96 = !{!97, !"cold"}
+!97 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!98 = !{!99, !"cold"}
+!99 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!100 = !{!101, !"cold"}
+!101 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!102 = !{!103, !"cold"}
+!103 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!104 = !{!105, !"cold"}
+!105 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!106 = !{!107, !"cold"}
+!107 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!108 = !{!109, !"cold"}
+!109 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!110 = !{!111, !"cold"}
+!111 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!112 = !{!113, !"cold"}
+!113 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!114 = !{!115, !"cold"}
+!115 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!116 = !{!117, !"cold"}
+!117 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!118 = !{!119, !"cold"}
+!119 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!120 = !{!121, !"cold"}
+!121 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!122 = !{!123, !"cold"}
+!123 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350}
+!124 = !{!125, !"cold"}
+!125 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 703544601638968040}
+!126 = !{!127, !"cold"}
+!127 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 3608551295493976071}
+!128 = !{!129, !"cold"}
+!129 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 4358851737059049324}
+!130 = !{!131, !"cold"}
+!131 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 5183932555433262029}
+!132 = !{!133, !"notcold"}
+!133 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7543168268883704429, i64 3717163490267262493}
+!134 = !{!135, !"cold"}
+!135 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7543168268883704429, i64 6956871998514654501}
+!136 = !{!137, !"cold"}
+!137 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7622038321347520920}
+!138 = !{!139, !"cold"}
+!139 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7923736392933351814}
+!140 = !{!141, !"cold"}
+!141 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 8786684249512079407}
+!142 = !{!143, !"cold"}
+!143 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200}
+!144 = !{!145, !"notcold"}
+!145 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565}
+!146 = !{!147, !"cold"}
+!147 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370}
+!148 = !{!149, !"cold"}
+!149 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -4578348703977238664}
+!150 = !{!151, !"cold"}
+!151 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -3818321239655607597}
+!152 = !{!153, !"cold"}
+!153 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -380569784870776951}
+!154 = !{!155, !"cold"}
+!155 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200}
+!156 = !{!157, !"cold"}
+!157 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565}
+!158 = !{!159, !"notcold"}
+!159 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370}
+!160 = !{!161, !"cold"}
+!161 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -3818321239655607597}
+!162 = !{!163, !"cold"}
+!163 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200}
+!164 = !{!165, !"cold"}
+!165 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565}
+!166 = !{!167, !"notcold"}
+!167 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370}
+!168 = !{!169, !"cold"}
+!169 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 -4578348703977238664}
+!170 = !{!171, !"cold"}
+!171 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6599978295092623218}
+!172 = !{!173, !"cold"}
+!173 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6580674910425543918}
+!174 = !{!175, !"cold"}
+!175 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6003516630083934328}
+!176 = !{!177, !"cold"}
+!177 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -725604301669741756}
+!178 = !{!179, !"cold"}
+!179 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1047178367295653512, i64 7589111283956938786}
+!180 = !{!181, !"cold"}
+!181 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1664659532151860180, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -8044437084415731706}
+!182 = !{!183, !"notcold"}
+!183 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1664659532151860180, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -4180453512466417651}
+!184 = !{!185, !"notcold"}
+!185 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -6616032301308523050, i64 1047178367295653512}
+!186 = !{!187, !"cold"}
+!187 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -6616032301308523050, i64 1664659532151860180}
+!188 = !{!189, !"cold"}
+!189 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 2314936267722354705}
+!190 = !{!191, !"cold"}
+!191 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 4570902236181266557}
+!192 = !{!193, !"notcold"}
+!193 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 -5242964645367079043}
+!194 = !{!195, !"cold"}
+!195 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -364806006548699073}
+!196 = !{!197, !"cold"}
+!197 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 4237282366048505309}
+!198 = !{!199, !"cold"}
+!199 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!200 = !{!201, !"cold"}
+!201 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!202 = !{!203, !"cold"}
+!203 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!204 = !{!205, !"cold"}
+!205 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!206 = !{!207, !"cold"}
+!207 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!208 = !{!209, !"cold"}
+!209 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!210 = !{!211, !"cold"}
+!211 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!212 = !{!213, !"cold"}
+!213 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!214 = !{!215, !"cold"}
+!215 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!216 = !{!217, !"cold"}
+!217 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!218 = !{!219, !"cold"}
+!219 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!220 = !{!221, !"cold"}
+!221 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!222 = !{!223, !"cold"}
+!223 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!224 = !{!225, !"cold"}
+!225 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!226 = !{!227, !"cold"}
+!227 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!228 = !{!229, !"notcold"}
+!229 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350}
+!230 = !{!231, !"cold"}
+!231 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!232 = !{!233, !"notcold"}
+!233 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148}
+!234 = !{!235, !"cold"}
+!235 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696}
+!236 = !{!237, !"cold"}
+!237 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!238 = !{!239, !"cold"}
+!239 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!240 = !{!241, !"cold"}
+!241 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!242 = !{!243, !"cold"}
+!243 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!244 = !{!245, !"cold"}
+!245 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!246 = !{!247, !"cold"}
+!247 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!248 = !{!249, !"cold"}
+!249 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!250 = !{!251, !"cold"}
+!251 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!252 = !{!253, !"cold"}
+!253 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!254 = !{!255, !"cold"}
+!255 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!256 = !{!257, !"cold"}
+!257 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!258 = !{!259, !"cold"}
+!259 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!260 = !{!261, !"cold"}
+!261 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!262 = !{!263, !"cold"}
+!263 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!264 = !{!265, !"cold"}
+!265 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!266 = !{!267, !"cold"}
+!267 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!268 = !{!269, !"cold"}
+!269 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!270 = !{!271, !"cold"}
+!271 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!272 = !{!273, !"cold"}
+!273 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!274 = !{!275, !"cold"}
+!275 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!276 = !{!277, !"cold"}
+!277 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!278 = !{!279, !"notcold"}
+!279 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350}
+!280 = !{!281, !"cold"}
+!281 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -2230507569891602201}
+!282 = !{!283, !"cold"}
+!283 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -8991518728647676517}
+!284 = !{!285, !"cold"}
+!285 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -7241526169985634471}
+!286 = !{!287, !"notcold"}
+!287 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -2070608964811380293}
+!288 = !{!289, !"notcold"}
+!289 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 3898931366823636439}
+!290 = !{!291, !"cold"}
+!291 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 6718910740018369967}
+!292 = !{!293, !"cold"}
+!293 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 8934449472348526968}
+!294 = !{!295, !"cold"}
+!295 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 -1796035955371760228}
+!296 = !{!297, !"notcold"}
+!297 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 5227238232120559200}
+!298 = !{!299, !"cold"}
+!299 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 -8676112728238134370}
+!300 = !{!301, !"cold"}
+!301 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 -5097561514079731295}
+!302 = !{!303, !"notcold"}
+!303 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7845428890017607803, i64 3791151357907954961, i64 -4282486660539596674, i64 2873405805561972602, i64 3863349475384737601, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550}
+!304 = !{!305, !"cold"}
+!305 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7845428890017607803, i64 3791151357907954961, i64 -4282486660539596674, i64 2873405805561972602, i64 3863349475384737601, i64 2509678254863108051, i64 927100692576121148, i64 -642697777073794173}
+!306 = !{!307, !"cold"}
+!307 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 -6175606193227481616}
+!308 = !{!309, !"cold"}
+!309 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 -2159124216309770955}
+!310 = !{!311, !"notcold"}
+!311 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 3430064890274741042}
+!312 = !{!313, !"cold"}
+!313 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 -7269278260587173790}
+!314 = !{!315, !"cold"}
+!315 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 -4180453512466417651}
+!316 = !{!317, !"cold"}
+!317 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 -1861972594052659404}
+!318 = !{!319, !"cold"}
+!319 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 5390968037314166465}
+!320 = !{!321, !"cold"}
+!321 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 7144453710359912936}
+!322 = !{!323, !"cold"}
+!323 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -7099026173386863432}
+!324 = !{!325, !"cold"}
+!325 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -5247604975316055689}
+!326 = !{!327, !"cold"}
+!327 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -4068062742094437340}
+!328 = !{!329, !"cold"}
+!329 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 5227238232120559200}
+!330 = !{!331, !"cold"}
+!331 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 -4578348703977238664}
+!332 = !{!333, !"cold"}
+!333 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 -3818321239655607597}
+!334 = !{!335, !"cold"}
+!335 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 2786152373261539699}
+!336 = !{!337, !"cold"}
+!337 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 4901895946974703269}
+!338 = !{!339, !"cold"}
+!339 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -5529974889548515387}
+!340 = !{!341, !"cold"}
+!341 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 1236134833646061117}
+!342 = !{!343, !"cold"}
+!343 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042}
+!344 = !{!345, !"cold"}
+!345 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -8044437084415731706}
+!346 = !{!347, !"cold"}
+!347 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 7589111283956938786, i64 -4015860425916992153, i64 -6574691243059686550, i64 -865315721850786456, i64 -4343166111045357943, i64 -9170346855404441874, i64 766214442491134059}
+!348 = !{!349, !"cold"}
+!349 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 7589111283956938786, i64 -4015860425916992153, i64 -6574691243059686550, i64 -865315721850786456, i64 -4343166111045357943, i64 -9170346855404441874, i64 -8321193460161544571}
+!350 = !{!351, !"cold"}
+!351 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 1249779207984631657, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042}
+!352 = !{!353, !"cold"}
+!353 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 1249779207984631657, i64 7589111283956938786}
+!354 = !{!355, !"cold"}
+!355 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 2763817770009354222, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042}
+!356 = !{!357, !"cold"}
+!357 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 3213891655075030301}
+!358 = !{!359, !"cold"}
+!359 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -9039914273107794752, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042}
+!360 = !{!361, !"cold"}
+!361 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -5356510825827839560, i64 1664659532151860180, i64 5180037934435350253}
+!362 = !{!363, !"cold"}
+!363 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -3801963940567328533}
+!364 = !{!365, !"cold"}
+!365 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -1190723991431075531}
+!366 = !{!367, !"cold"}
+!367 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 5375458305822857010, i64 296143722720000868}
+!368 = !{!369, !"cold"}
+!369 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 7093742121589842512, i64 -5755277970707044392}
+!370 = !{!371, !"cold"}
+!371 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 8652501918564778285, i64 -5755277970707044392}
+!372 = !{!373, !"cold"}
+!373 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!374 = !{!375, !"cold"}
+!375 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!376 = !{!377, !"cold"}
+!377 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!378 = !{!379, !"cold"}
+!379 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!380 = !{!381, !"cold"}
+!381 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!382 = !{!383, !"cold"}
+!383 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!384 = !{!385, !"cold"}
+!385 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!386 = !{!387, !"cold"}
+!387 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!388 = !{!389, !"cold"}
+!389 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!390 = !{!391, !"cold"}
+!391 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!392 = !{!393, !"cold"}
+!393 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!394 = !{!395, !"cold"}
+!395 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!396 = !{!397, !"cold"}
+!397 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!398 = !{!399, !"cold"}
+!399 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!400 = !{!401, !"cold"}
+!401 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!402 = !{!403, !"cold"}
+!403 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!404 = !{!405, !"cold"}
+!405 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!406 = !{!407, !"cold"}
+!407 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148}
+!408 = !{!409, !"cold"}
+!409 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!410 = !{!411, !"cold"}
+!411 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!412 = !{!413, !"cold"}
+!413 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!414 = !{!415, !"cold"}
+!415 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148}
+!416 = !{!417, !"cold"}
+!417 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!418 = !{!419, !"cold"}
+!419 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!420 = !{!421, !"cold"}
+!421 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!422 = !{!423, !"cold"}
+!423 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!424 = !{!425, !"cold"}
+!425 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!426 = !{!427, !"cold"}
+!427 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!428 = !{!429, !"cold"}
+!429 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214}
+!430 = !{!431, !"cold"}
+!431 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350}
+!432 = !{!433, !"cold"}
+!433 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 6409510268436951296, i64 3865292116923879613, i64 6531269350744799067}
+!434 = !{!435, !"cold"}
+!435 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 -8512453591954350037}
+!436 = !{!437, !"cold"}
+!437 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -529266326168624863}
+!438 = !{!439, !"cold"}
+!439 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 75847647400915869}
+!440 = !{!441, !"notcold"}
+!441 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 183044117821689400}
+!442 = !{!443, !"cold"}
+!443 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 448612777694345181}
+!444 = !{!445, !"cold"}
+!445 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 885980778253288927}
+!446 = !{!447, !"cold"}
+!447 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1180177230914639611}
+!448 = !{!449, !"cold"}
+!449 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1494295413782701148}
+!450 = !{!451, !"cold"}
+!451 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1659406033211294585}
+!452 = !{!453, !"cold"}
+!453 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1746300563922301711}
+!454 = !{!455, !"cold"}
+!455 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1838331058072364305}
+!456 = !{!457, !"cold"}
+!457 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1955855015757903991}
+!458 = !{!459, !"cold"}
+!459 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2061170524869170355}
+!460 = !{!461, !"cold"}
+!461 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2063820668743361132}
+!462 = !{!463, !"cold"}
+!463 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2165533455721301492}
+!464 = !{!465, !"cold"}
+!465 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2244685673087294926}
+!466 = !{!467, !"cold"}
+!467 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3034209063447837014}
+!468 = !{!469, !"cold"}
+!469 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3036500738208828614}
+!470 = !{!471, !"cold"}
+!471 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3377720129330375308}
+!472 = !{!473, !"cold"}
+!473 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3706881338225322912}
+!474 = !{!475, !"cold"}
+!475 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3782560208401211919}
+!476 = !{!477, !"cold"}
+!477 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3863666838850843735}
+!478 = !{!479, !"cold"}
+!479 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4283301159808000566}
+!480 = !{!481, !"cold"}
+!481 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4384706011107298227}
+!482 = !{!483, !"cold"}
+!483 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4607155140475180580}
+!484 = !{!485, !"cold"}
+!485 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4714747246633738682}
+!486 = !{!487, !"cold"}
+!487 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4723652453916328978}
+!488 = !{!489, !"cold"}
+!489 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4727386874466965794}
+!490 = !{!491, !"cold"}
+!491 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5253938656018120605}
+!492 = !{!493, !"cold"}
+!493 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5514184169544971646}
+!494 = !{!495, !"cold"}
+!495 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5608420749997639494}
+!496 = !{!497, !"cold"}
+!497 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5673887548322881439}
+!498 = !{!499, !"cold"}
+!499 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6431479059631025437}
+!500 = !{!501, !"cold"}
+!501 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6478461706098589874}
+!502 = !{!503, !"cold"}
+!503 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6516605682960517029}
+!504 = !{!505, !"cold"}
+!505 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6573556954419869084}
+!506 = !{!507, !"cold"}
+!507 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6616057203216317315}
+!508 = !{!509, !"cold"}
+!509 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7654798960227041366}
+!510 = !{!511, !"cold"}
+!511 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8203893749632590951}
+!512 = !{!513, !"cold"}
+!513 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8205991499743838669}
+!514 = !{!515, !"cold"}
+!515 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8731033915056759998}
+!516 = !{!517, !"cold"}
+!517 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9004589987493894324}
+!518 = !{!519, !"cold"}
+!519 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9163289110187211963}
+!520 = !{!521, !"cold"}
+!521 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9194683874556492116}
+!522 = !{!523, !"cold"}
+!523 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8575720568240258506}
+!524 = !{!525, !"cold"}
+!525 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8058981938237464077}
+!526 = !{!527, !"cold"}
+!527 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7589260736264366062}
+!528 = !{!529, !"cold"}
+!529 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6900734672072851702}
+!530 = !{!531, !"cold"}
+!531 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6409112906345845960}
+!532 = !{!533, !"cold"}
+!533 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5965768366903796804}
+!534 = !{!535, !"cold"}
+!535 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5597465602586680838}
+!536 = !{!537, !"cold"}
+!537 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5417805719047465045}
+!538 = !{!539, !"cold"}
+!539 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5086634999161749506}
+!540 = !{!541, !"cold"}
+!541 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4597440142749257264}
+!542 = !{!543, !"cold"}
+!543 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4451114404581865651}
+!544 = !{!545, !"cold"}
+!545 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3770505485117883193}
+!546 = !{!547, !"cold"}
+!547 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3615756978728100936}
+!548 = !{!549, !"cold"}
+!549 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3487944172906860150}
+!550 = !{!551, !"cold"}
+!551 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3343380507450452769}
+!552 = !{!553, !"cold"}
+!553 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2513421463471446585}
+!554 = !{!555, !"cold"}
+!555 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2457105650992547275}
+!556 = !{!557, !"cold"}
+!557 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2303844962102880850}
+!558 = !{!559, !"cold"}
+!559 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2234974922194715802}
+!560 = !{!561, !"cold"}
+!561 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2202950085962554431}
+!562 = !{!563, !"cold"}
+!563 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2188352265869670772}
+!564 = !{!565, !"cold"}
+!565 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1875947641213642418}
+!566 = !{!567, !"cold"}
+!567 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1820129202923097948}
+!568 = !{!569, !"cold"}
+!569 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1373019485205162581}
+!570 = !{!571, !"cold"}
+!571 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1152657617489033068}
+!572 = !{!573, !"cold"}
+!573 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1141827363237952836}
+!574 = !{!575, !"cold"}
+!575 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1018440817185542489}
+!576 = !{!577, !"cold"}
+!577 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -864073397647784829}
+!578 = !{!579, !"cold"}
+!579 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -621063222059245646}
+!580 = !{!581, !"cold"}
+!581 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -443019638236000223}
+!582 = !{!583, !"cold"}
+!583 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -365722648089426825}
+!584 = !{!585, !"cold"}
+!585 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -87465032429483344}
+!586 = !{!587, !"cold"}
+!587 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -85418178938284967}
+!588 = !{!589, !"cold"}
+!589 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -1631791060190603686}
+!590 = !{!591, !"cold"}
+!591 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 7655619093809680908}
+!592 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257}
+!593 = !{i64 -2395126913894468764, i64 -5597465602586680838}
+!594 = !{i64 8922170082039419050}
+!595 = !{i64 -627092666823557924}
+!596 = !{!"function_entry_count", i64 152}
+!597 = !{i64 -3914526793362151039, i64 -281623063928291605}
+!598 = !{i64 8550096939708590324, i64 3396817364192390559}
+!599 = !{!600, !602, !604, !606, !608, !610, !612, !614, !616, !618, !620, !622, !624, !626, !628, !630, !632, !634, !636, !638, !640, !642, !644, !646, !648, !650, !652, !654, !656, !658, !660, !662, !664, !666, !668, !670, !672, !674, !676, !678, !680, !682, !684, !686, !688, !690, !692, !694, !696, !698, !700, !702, !704, !706, !708, !710, !712, !714, !716, !718, !720, !722, !724, !726, !728, !730, !732, !734, !736, !738, !740, !742, !744, !746, !748, !750, !752, !754, !756, !758, !760, !762, !764, !766}
+!600 = !{!601, !"cold"}
+!601 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 75847647400915869}
+!602 = !{!603, !"notcold"}
+!603 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 183044117821689400}
+!604 = !{!605, !"cold"}
+!605 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 387225403803835478}
+!606 = !{!607, !"cold"}
+!607 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 794721636207329848}
+!608 = !{!609, !"cold"}
+!609 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 885980778253288927}
+!610 = !{!611, !"cold"}
+!611 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1015792291075825982}
+!612 = !{!613, !"cold"}
+!613 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1111950148150102502}
+!614 = !{!615, !"cold"}
+!615 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1180177230914639611}
+!616 = !{!617, !"cold"}
+!617 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1494295413782701148}
+!618 = !{!619, !"cold"}
+!619 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1659406033211294585}
+!620 = !{!621, !"cold"}
+!621 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1746300563922301711}
+!622 = !{!623, !"cold"}
+!623 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1955855015757903991}
+!624 = !{!625, !"cold"}
+!625 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2061170524869170355}
+!626 = !{!627, !"cold"}
+!627 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2244685673087294926}
+!628 = !{!629, !"cold"}
+!629 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2258506400904444929}
+!630 = !{!631, !"cold"}
+!631 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2780636789936373045}
+!632 = !{!633, !"cold"}
+!633 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3003382690369027000}
+!634 = !{!635, !"cold"}
+!635 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3036500738208828614}
+!636 = !{!637, !"cold"}
+!637 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3377720129330375308}
+!638 = !{!639, !"cold"}
+!639 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3516390910007184229}
+!640 = !{!641, !"cold"}
+!641 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3730017386320691583}
+!642 = !{!643, !"cold"}
+!643 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3863666838850843735}
+!644 = !{!645, !"cold"}
+!645 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3987931074178356766}
+!646 = !{!647, !"cold"}
+!647 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4475965293000579642}
+!648 = !{!649, !"cold"}
+!649 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4727386874466965794}
+!650 = !{!651, !"cold"}
+!651 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5626241009149718631}
+!652 = !{!653, !"cold"}
+!653 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5642831061945076553}
+!654 = !{!655, !"cold"}
+!655 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5803992361678417930}
+!656 = !{!657, !"cold"}
+!657 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6027979419669521677}
+!658 = !{!659, !"cold"}
+!659 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6042081988325512172}
+!660 = !{!661, !"cold"}
+!661 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6431479059631025437}
+!662 = !{!663, !"cold"}
+!663 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6516605682960517029}
+!664 = !{!665, !"cold"}
+!665 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6591116691263167381}
+!666 = !{!667, !"cold"}
+!667 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6616057203216317315}
+!668 = !{!669, !"cold"}
+!669 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7353174428633641643}
+!670 = !{!671, !"cold"}
+!671 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7359049424616934025}
+!672 = !{!673, !"cold"}
+!673 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7654798960227041366}
+!674 = !{!675, !"cold"}
+!675 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7683835084651596728}
+!676 = !{!677, !"cold"}
+!677 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8205991499743838669}
+!678 = !{!679, !"cold"}
+!679 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8238108917955982391}
+!680 = !{!681, !"cold"}
+!681 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8837495783078234090}
+!682 = !{!683, !"cold"}
+!683 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9194683874556492116}
+!684 = !{!685, !"cold"}
+!685 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8803530003105057334}
+!686 = !{!687, !"cold"}
+!687 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8442443030798361395}
+!688 = !{!689, !"cold"}
+!689 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8058981938237464077}
+!690 = !{!691, !"cold"}
+!691 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7589260736264366062}
+!692 = !{!693, !"cold"}
+!693 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7386139261276737944}
+!694 = !{!695, !"cold"}
+!695 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7185537622496035457}
+!696 = !{!697, !"cold"}
+!697 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6900734672072851702}
+!698 = !{!699, !"cold"}
+!699 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6762801909492469455}
+!700 = !{!701, !"cold"}
+!701 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6724742556843950685}
+!702 = !{!703, !"cold"}
+!703 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6409112906345845960}
+!704 = !{!705, !"cold"}
+!705 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5965768366903796804}
+!706 = !{!707, !"cold"}
+!707 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5850556812626124399}
+!708 = !{!709, !"cold"}
+!709 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5753111555624672216}
+!710 = !{!711, !"cold"}
+!711 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5726116740966450362}
+!712 = !{!713, !"cold"}
+!713 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5725103543964193355}
+!714 = !{!715, !"cold"}
+!715 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5597465602586680838}
+!716 = !{!717, !"cold"}
+!717 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5542310814437003522}
+!718 = !{!719, !"cold"}
+!719 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5086634999161749506}
+!720 = !{!721, !"cold"}
+!721 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4892330537265564330}
+!722 = !{!723, !"cold"}
+!723 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4597440142749257264}
+!724 = !{!725, !"cold"}
+!725 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3770505485117883193}
+!726 = !{!727, !"cold"}
+!727 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3715299750555754568}
+!728 = !{!729, !"cold"}
+!729 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3615756978728100936}
+!730 = !{!731, !"cold"}
+!731 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3487944172906860150}
+!732 = !{!733, !"cold"}
+!733 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3343380507450452769}
+!734 = !{!735, !"cold"}
+!735 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3290831674468055960}
+!736 = !{!737, !"cold"}
+!737 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3257889137998070481}
+!738 = !{!739, !"cold"}
+!739 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2808938421088748093}
+!740 = !{!741, !"cold"}
+!741 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2656685852259003985}
+!742 = !{!743, !"cold"}
+!743 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2513421463471446585}
+!744 = !{!745, !"cold"}
+!745 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2346373089490877370}
+!746 = !{!747, !"cold"}
+!747 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2188352265869670772}
+!748 = !{!749, !"cold"}
+!749 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1875947641213642418}
+!750 = !{!751, !"cold"}
+!751 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1820129202923097948}
+!752 = !{!753, !"cold"}
+!753 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1152657617489033068}
+!754 = !{!755, !"cold"}
+!755 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1141827363237952836}
+!756 = !{!757, !"cold"}
+!757 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1045863445022014911}
+!758 = !{!759, !"cold"}
+!759 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -443019638236000223}
+!760 = !{!761, !"cold"}
+!761 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -365722648089426825}
+!762 = !{!763, !"cold"}
+!763 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -290581884459589510}
+!764 = !{!765, !"cold"}
+!765 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -128397300321256467}
+!766 = !{!767, !"cold"}
+!767 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -1631791060190603686}
+!768 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908}
+!769 = !{i64 -2395126913894468764, i64 2258506400904444929}
+!770 = !{i64 8287424466766266086}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll
index 990a4a4..151ebf3 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll
@@ -4,6 +4,9 @@
;; The code is similar to that of basic.ll, but with a second allocation.
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+;; Disable merge iteration for now as it causes spurious diffs due to different
+;; iteration order (but the same ultimate hinting of the contexts).
+; RUN: -memprof-merge-iteration=false \
; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \
; RUN: --check-prefix=IR --check-prefix=DUMP
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll
index 80f6bc7..5968c3f 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll
@@ -6,6 +6,9 @@
;; share the merged nodes when possible.
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+;; Disable merge iteration for now as it causes spurious diffs due to different
+;; iteration order (but the same ultimate hinting of the contexts).
+; RUN: -memprof-merge-iteration=false \
; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \
; RUN: --check-prefix=IR --check-prefix=DUMP
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
new file mode 100644
index 0000000..bb6f3e7
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
@@ -0,0 +1,1075 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -mcpu=neoverse-v2 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
+
+define void @same_op2(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op2(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[WIDE_VEC15:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC18:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[WIDE_VEC21:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[WIDE_VEC27:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <8 x float> [[WIDE_VEC18]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <8 x float> [[WIDE_VEC24]], [[TMP7]]
+; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <8 x float> [[WIDE_VEC21]], [[WIDE_VEC15]]
+; CHECK-NEXT: [[INTERLEAVED_VEC30:%.*]] = fadd fast <8 x float> [[WIDE_VEC27]], [[TMP8]]
+; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC30]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 576
+; CHECK-NEXT: br i1 [[TMP9]], label %[[FOR_END13:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[FOR_END13]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 2, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc11, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end13
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 2
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %3 = load i32, ptr %i, align 4
+ %4 = load i32, ptr %j, align 4
+ %add = add nsw i32 %3, %4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+ %5 = load float, ptr %arrayidx, align 4
+ %6 = load ptr, ptr %b.addr, align 8
+ %7 = load i32, ptr %i, align 4
+ %8 = load i32, ptr %j, align 4
+ %add4 = add nsw i32 %7, %8
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds float, ptr %6, i64 %idxprom5
+ %9 = load float, ptr %arrayidx6, align 4
+ %mul = fmul fast float %5, %9
+ %10 = load ptr, ptr %a.addr, align 8
+ %11 = load i32, ptr %i, align 4
+ %12 = load i32, ptr %j, align 4
+ %add7 = add nsw i32 %11, %12
+ %idxprom8 = sext i32 %add7 to i64
+ %arrayidx9 = getelementptr inbounds float, ptr %10, i64 %idxprom8
+ %13 = load float, ptr %arrayidx9, align 4
+ %add10 = fadd fast float %13, %mul
+ store float %add10, ptr %arrayidx9, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %14 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %14, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc11
+
+for.inc11: ; preds = %for.end
+ %15 = load i32, ptr %i, align 4
+ %add12 = add nsw i32 %15, 2
+ store i32 %add12, ptr %i, align 4
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op2_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op2_splat(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 8
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[WIDE_VEC13:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[WIDE_VEC16:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <8 x float>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <8 x float> [[WIDE_VEC]], [[TMP1]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <8 x float> [[WIDE_VEC16]], [[TMP8]]
+; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <8 x float> [[WIDE_VEC13]], [[TMP2]]
+; CHECK-NEXT: [[INTERLEAVED_VEC22:%.*]] = fadd fast <8 x float> [[WIDE_VEC19]], [[TMP9]]
+; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC22]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 576
+; CHECK-NEXT: br i1 [[TMP10]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END11]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 2, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc9, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end11
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 2
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 0
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %b.addr, align 8
+ %5 = load i32, ptr %i, align 4
+ %6 = load i32, ptr %j, align 4
+ %add = add nsw i32 %5, %6
+ %idxprom = sext i32 %add to i64
+ %arrayidx4 = getelementptr inbounds float, ptr %4, i64 %idxprom
+ %7 = load float, ptr %arrayidx4, align 4
+ %mul = fmul fast float %3, %7
+ %8 = load ptr, ptr %a.addr, align 8
+ %9 = load i32, ptr %i, align 4
+ %10 = load i32, ptr %j, align 4
+ %add5 = add nsw i32 %9, %10
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds float, ptr %8, i64 %idxprom6
+ %11 = load float, ptr %arrayidx7, align 4
+ %add8 = fadd fast float %11, %mul
+ store float %add8, ptr %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %12 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc9
+
+for.inc9: ; preds = %for.end
+ %13 = load i32, ptr %i, align 4
+ %add10 = add nsw i32 %13, 2
+ store i32 %add10, ptr %i, align 4
+ br label %for.cond
+
+for.end11: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op3(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op3(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC16:%.*]] = load <12 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC20:%.*]] = load <12 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <12 x float> [[WIDE_VEC16]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <12 x float> [[WIDE_VEC20]], [[TMP3]]
+; CHECK-NEXT: store <12 x float> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 384
+; CHECK-NEXT: br i1 [[TMP4]], label %[[FOR_END13:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[FOR_END13]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 3, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc11, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end13
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 3
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %3 = load i32, ptr %i, align 4
+ %4 = load i32, ptr %j, align 4
+ %add = add nsw i32 %3, %4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+ %5 = load float, ptr %arrayidx, align 4
+ %6 = load ptr, ptr %b.addr, align 8
+ %7 = load i32, ptr %i, align 4
+ %8 = load i32, ptr %j, align 4
+ %add4 = add nsw i32 %7, %8
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds float, ptr %6, i64 %idxprom5
+ %9 = load float, ptr %arrayidx6, align 4
+ %mul = fmul fast float %5, %9
+ %10 = load ptr, ptr %a.addr, align 8
+ %11 = load i32, ptr %i, align 4
+ %12 = load i32, ptr %j, align 4
+ %add7 = add nsw i32 %11, %12
+ %idxprom8 = sext i32 %add7 to i64
+ %arrayidx9 = getelementptr inbounds float, ptr %10, i64 %idxprom8
+ %13 = load float, ptr %arrayidx9, align 4
+ %add10 = fadd fast float %13, %mul
+ store float %add10, ptr %arrayidx9, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %14 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %14, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc11
+
+for.inc11: ; preds = %for.end
+ %15 = load i32, ptr %i, align 4
+ %add12 = add nsw i32 %15, 3
+ store i32 %add12, ptr %i, align 4
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op3_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op3_splat(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC14:%.*]] = load <12 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[STRIDED_VEC15]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[STRIDED_VEC12]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[STRIDED_VEC16]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[STRIDED_VEC13]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <12 x float> [[WIDE_VEC14]], <12 x float> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <8 x float> [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[TMP11]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+; CHECK-NEXT: store <12 x float> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 384
+; CHECK-NEXT: br i1 [[TMP12]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[FOR_END11]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 3, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc9, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end11
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 3
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 0
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %b.addr, align 8
+ %5 = load i32, ptr %i, align 4
+ %6 = load i32, ptr %j, align 4
+ %add = add nsw i32 %5, %6
+ %idxprom = sext i32 %add to i64
+ %arrayidx4 = getelementptr inbounds float, ptr %4, i64 %idxprom
+ %7 = load float, ptr %arrayidx4, align 4
+ %mul = fmul fast float %3, %7
+ %8 = load ptr, ptr %a.addr, align 8
+ %9 = load i32, ptr %i, align 4
+ %10 = load i32, ptr %j, align 4
+ %add5 = add nsw i32 %9, %10
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds float, ptr %8, i64 %idxprom6
+ %11 = load float, ptr %arrayidx7, align 4
+ %add8 = fadd fast float %11, %mul
+ store float %add8, ptr %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %12 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc9
+
+for.inc9: ; preds = %for.end
+ %13 = load i32, ptr %i, align 4
+ %add10 = add nsw i32 %13, 3
+ store i32 %add10, ptr %i, align 4
+ br label %for.cond
+
+for.end11: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op4(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op4(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC17:%.*]] = load <16 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC22:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x float> [[WIDE_VEC17]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC22]], [[TMP3]]
+; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 288
+; CHECK-NEXT: br i1 [[TMP4]], label %[[FOR_END13:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[FOR_END13]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 4, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc11, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end13
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 4
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %3 = load i32, ptr %i, align 4
+ %4 = load i32, ptr %j, align 4
+ %add = add nsw i32 %3, %4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+ %5 = load float, ptr %arrayidx, align 4
+ %6 = load ptr, ptr %b.addr, align 8
+ %7 = load i32, ptr %i, align 4
+ %8 = load i32, ptr %j, align 4
+ %add4 = add nsw i32 %7, %8
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds float, ptr %6, i64 %idxprom5
+ %9 = load float, ptr %arrayidx6, align 4
+ %mul = fmul fast float %5, %9
+ %10 = load ptr, ptr %a.addr, align 8
+ %11 = load i32, ptr %i, align 4
+ %12 = load i32, ptr %j, align 4
+ %add7 = add nsw i32 %11, %12
+ %idxprom8 = sext i32 %add7 to i64
+ %arrayidx9 = getelementptr inbounds float, ptr %10, i64 %idxprom8
+ %13 = load float, ptr %arrayidx9, align 4
+ %add10 = fadd fast float %13, %mul
+ store float %add10, ptr %arrayidx9, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %14 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %14, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc11
+
+for.inc11: ; preds = %for.end
+ %15 = load i32, ptr %i, align 4
+ %add12 = add nsw i32 %15, 4
+ store i32 %add12, ptr %i, align 4
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op4_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op4_splat(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC15:%.*]] = load <16 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC15]], [[TMP4]]
+; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 288
+; CHECK-NEXT: br i1 [[TMP5]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[FOR_END11]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 4, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc9, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end11
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 4
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 0
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %b.addr, align 8
+ %5 = load i32, ptr %i, align 4
+ %6 = load i32, ptr %j, align 4
+ %add = add nsw i32 %5, %6
+ %idxprom = sext i32 %add to i64
+ %arrayidx4 = getelementptr inbounds float, ptr %4, i64 %idxprom
+ %7 = load float, ptr %arrayidx4, align 4
+ %mul = fmul fast float %3, %7
+ %8 = load ptr, ptr %a.addr, align 8
+ %9 = load i32, ptr %i, align 4
+ %10 = load i32, ptr %j, align 4
+ %add5 = add nsw i32 %9, %10
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds float, ptr %8, i64 %idxprom6
+ %11 = load float, ptr %arrayidx7, align 4
+ %add8 = fadd fast float %11, %mul
+ store float %add8, ptr %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %12 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc9
+
+for.inc9: ; preds = %for.end
+ %13 = load i32, ptr %i, align 4
+ %add10 = add nsw i32 %13, 4
+ store i32 %add10, ptr %i, align 4
+ br label %for.cond
+
+for.end11: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op6(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op6(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; CHECK: [[FOR_COND1_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[TMP5]]
+; CHECK-NEXT: [[ARRAYIDX6_4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT: [[ARRAYIDX9_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX6_4]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX9_4]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[ARRAYIDX9_4]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1146
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_END13:.*]]
+; CHECK: [[FOR_END13]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 6, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc11, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end13
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 6
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %3 = load i32, ptr %i, align 4
+ %4 = load i32, ptr %j, align 4
+ %add = add nsw i32 %3, %4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+ %5 = load float, ptr %arrayidx, align 4
+ %6 = load ptr, ptr %b.addr, align 8
+ %7 = load i32, ptr %i, align 4
+ %8 = load i32, ptr %j, align 4
+ %add4 = add nsw i32 %7, %8
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds float, ptr %6, i64 %idxprom5
+ %9 = load float, ptr %arrayidx6, align 4
+ %mul = fmul fast float %5, %9
+ %10 = load ptr, ptr %a.addr, align 8
+ %11 = load i32, ptr %i, align 4
+ %12 = load i32, ptr %j, align 4
+ %add7 = add nsw i32 %11, %12
+ %idxprom8 = sext i32 %add7 to i64
+ %arrayidx9 = getelementptr inbounds float, ptr %10, i64 %idxprom8
+ %13 = load float, ptr %arrayidx9, align 4
+ %add10 = fadd fast float %13, %mul
+ store float %add10, ptr %arrayidx9, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %14 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %14, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc11
+
+for.inc11: ; preds = %for.end
+ %15 = load i32, ptr %i, align 4
+ %add12 = add nsw i32 %15, 6
+ store i32 %add12, ptr %i, align 4
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op6_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op6_splat(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; CHECK: [[FOR_COND1_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[ARRAYIDX4_4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT: [[ARRAYIDX7_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX4_4]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x float> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = load <2 x float>, ptr [[ARRAYIDX7_4]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <2 x float> [[TMP12]], [[TMP11]]
+; CHECK-NEXT: store <2 x float> [[TMP13]], ptr [[ARRAYIDX7_4]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1146
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_END11:.*]]
+; CHECK: [[FOR_END11]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 6, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc9, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end11
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 6
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 0
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %b.addr, align 8
+ %5 = load i32, ptr %i, align 4
+ %6 = load i32, ptr %j, align 4
+ %add = add nsw i32 %5, %6
+ %idxprom = sext i32 %add to i64
+ %arrayidx4 = getelementptr inbounds float, ptr %4, i64 %idxprom
+ %7 = load float, ptr %arrayidx4, align 4
+ %mul = fmul fast float %3, %7
+ %8 = load ptr, ptr %a.addr, align 8
+ %9 = load i32, ptr %i, align 4
+ %10 = load i32, ptr %j, align 4
+ %add5 = add nsw i32 %9, %10
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds float, ptr %8, i64 %idxprom6
+ %11 = load float, ptr %arrayidx7, align 4
+ %add8 = fadd fast float %11, %mul
+ store float %add8, ptr %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %12 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc9
+
+for.inc9: ; preds = %for.end
+ %13 = load i32, ptr %i, align 4
+ %add10 = add nsw i32 %13, 6
+ store i32 %add10, ptr %i, align 4
+ br label %for.cond
+
+for.end11: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op8(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op8(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND1_PREHEADER:.*]]
+; CHECK: [[FOR_COND1_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[TMP3]], [[TMP2]]
+; CHECK-NEXT: store <4 x float> [[TMP4]], ptr [[ARRAYIDX9]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[TMP5]]
+; CHECK-NEXT: [[ARRAYIDX6_4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT: [[ARRAYIDX9_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX6_4]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX9_4]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[ARRAYIDX9_4]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1144
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_COND1_PREHEADER]], label %[[FOR_END13:.*]]
+; CHECK: [[FOR_END13]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 8, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc11, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end13
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 8
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %3 = load i32, ptr %i, align 4
+ %4 = load i32, ptr %j, align 4
+ %add = add nsw i32 %3, %4
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+ %5 = load float, ptr %arrayidx, align 4
+ %6 = load ptr, ptr %b.addr, align 8
+ %7 = load i32, ptr %i, align 4
+ %8 = load i32, ptr %j, align 4
+ %add4 = add nsw i32 %7, %8
+ %idxprom5 = sext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds float, ptr %6, i64 %idxprom5
+ %9 = load float, ptr %arrayidx6, align 4
+ %mul = fmul fast float %5, %9
+ %10 = load ptr, ptr %a.addr, align 8
+ %11 = load i32, ptr %i, align 4
+ %12 = load i32, ptr %j, align 4
+ %add7 = add nsw i32 %11, %12
+ %idxprom8 = sext i32 %add7 to i64
+ %arrayidx9 = getelementptr inbounds float, ptr %10, i64 %idxprom8
+ %13 = load float, ptr %arrayidx9, align 4
+ %add10 = fadd fast float %13, %mul
+ store float %add10, ptr %arrayidx9, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %14 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %14, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc11
+
+for.inc11: ; preds = %for.end
+ %15 = load i32, ptr %i, align 4
+ %add12 = add nsw i32 %15, 8
+ store i32 %add12, ptr %i, align 4
+ br label %for.cond
+
+for.end13: ; preds = %for.cond
+ ret void
+}
+
+
+define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef %c) {
+; CHECK-LABEL: define void @same_op8_splat(
+; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
+; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
+; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
+; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
+; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
+; CHECK-NEXT: [[STRIDED_VEC24:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
+; CHECK-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
+; CHECK-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
+; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[STRIDED_VEC20]], <2 x float> [[STRIDED_VEC21]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[STRIDED_VEC]], <2 x float> [[STRIDED_VEC12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[TMP8]], [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[STRIDED_VEC22]], <2 x float> [[STRIDED_VEC23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[STRIDED_VEC13]], <2 x float> [[STRIDED_VEC14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP2]]
+; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[STRIDED_VEC24]], <2 x float> [[STRIDED_VEC25]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[STRIDED_VEC15]], <2 x float> [[STRIDED_VEC16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP3]]
+; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[STRIDED_VEC26]], <2 x float> [[STRIDED_VEC27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[STRIDED_VEC17]], <2 x float> [[STRIDED_VEC18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x float> [[TMP20]], [[TMP4]]
+; CHECK-NEXT: [[TMP22:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP21]]
+; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP24]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144
+; CHECK-NEXT: br i1 [[TMP25]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[FOR_END11]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.addr = alloca ptr, align 8
+ %b.addr = alloca ptr, align 8
+ %c.addr = alloca ptr, align 8
+ %N = alloca i32, align 4
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ store ptr %a, ptr %a.addr, align 8
+ store ptr %b, ptr %b.addr, align 8
+ store ptr %c, ptr %c.addr, align 8
+ store i32 8, ptr %N, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc9, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 1152
+ br i1 %cmp, label %for.body, label %for.end11
+
+for.body: ; preds = %for.cond
+ store i32 0, ptr %j, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.inc, %for.body
+ %1 = load i32, ptr %j, align 4
+ %cmp2 = icmp slt i32 %1, 8
+ br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3: ; preds = %for.cond1
+ %2 = load ptr, ptr %c.addr, align 8
+ %arrayidx = getelementptr inbounds float, ptr %2, i64 0
+ %3 = load float, ptr %arrayidx, align 4
+ %4 = load ptr, ptr %b.addr, align 8
+ %5 = load i32, ptr %i, align 4
+ %6 = load i32, ptr %j, align 4
+ %add = add nsw i32 %5, %6
+ %idxprom = sext i32 %add to i64
+ %arrayidx4 = getelementptr inbounds float, ptr %4, i64 %idxprom
+ %7 = load float, ptr %arrayidx4, align 4
+ %mul = fmul fast float %3, %7
+ %8 = load ptr, ptr %a.addr, align 8
+ %9 = load i32, ptr %i, align 4
+ %10 = load i32, ptr %j, align 4
+ %add5 = add nsw i32 %9, %10
+ %idxprom6 = sext i32 %add5 to i64
+ %arrayidx7 = getelementptr inbounds float, ptr %8, i64 %idxprom6
+ %11 = load float, ptr %arrayidx7, align 4
+ %add8 = fadd fast float %11, %mul
+ store float %add8, ptr %arrayidx7, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body3
+ %12 = load i32, ptr %j, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, ptr %j, align 4
+ br label %for.cond1
+
+for.end: ; preds = %for.cond1
+ br label %for.inc9
+
+for.inc9: ; preds = %for.end
+ %13 = load i32, ptr %i, align 4
+ %add10 = add nsw i32 %13, 8
+ store i32 %add10, ptr %i, align 4
+ br label %for.cond
+
+for.end11: ; preds = %for.cond
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
index c98e7d3..482907d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -8,12 +8,12 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ptr noalias noundef readonly captures(none) [[TMP0:%.*]], ptr noalias noundef writeonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: vector.ph:
-; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
; CHECK-NEXT: br label [[TMP4:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[TMP3]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -28
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5)
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
index d313164..1a18526 100644
--- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=reassociate -S | FileCheck %s --check-prefixes=CHECK,CHECK-CV
+; RUN: opt < %s -passes=reassociate -S -use-constant-int-for-fixed-length-splat=false | FileCheck %s --check-prefixes=CHECK,CHECK-CV
; RUN: opt < %s -passes=reassociate -S -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=CHECK,CHECK-CI
; Check that a*c+b*c is turned into (a+b)*c
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
index b309682..2fad306c5 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll
@@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) {
; CHECK-LABEL: define void @testDisjointOrSplits(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
-; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
-; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
index b0e88ef..a6b38bc 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll
@@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) {
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64
; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[SUB22]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2044
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP7]], align 4
; CHECK-NEXT: br label [[COND_END]]
; CHECK: cond.end:
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 143cc38..8c2d1a4 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -18,7 +18,17 @@ from lit.llvm.subst import ToolSubst
config.name = "LLVM"
# testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+extra_substitutions = extra_substitutions = (
+ [
+ (r"\| not FileCheck .*", "> /dev/null"),
+ (r"\| FileCheck .*", "> /dev/null"),
+ ]
+ if config.enable_profcheck
+ else []
+)
+config.test_format = lit.formats.ShTest(
+ not llvm_config.use_lit_shell, extra_substitutions
+)
# suffixes: A list of file extensions to treat as test files. This is overriden
# by individual lit.local.cfg files in the test subdirectories.
@@ -107,7 +117,12 @@ lli_args = []
# we don't support COFF in MCJIT well enough for the tests, force ELF format on
# Windows. FIXME: the process target triple should be used here, but this is
# difficult to obtain on Windows.
-if re.search(r"cygwin|windows-gnu|windows-msvc", config.host_triple):
+# Cygwin is excluded from this workaround, even though it is COFF, because this
+# breaks remote tests due to not having a __register_frame function. The only
+# test that succeeds with cygwin-elf but fails with cygwin is
+# test/ExecutionEngine/MCJIT/stubs-sm-pic.ll so this test is marked as XFAIL
+# for cygwin targets.
+if re.search(r"windows-gnu|windows-msvc", config.host_triple):
lli_args = ["-mtriple=" + config.host_triple + "-elf"]
llc_args = []
@@ -278,6 +293,7 @@ tools.extend(
]
)
+
# Find (major, minor) version of ptxas
def ptxas_version(ptxas):
ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE)
@@ -385,10 +401,11 @@ if config.target_triple:
else:
config.available_features.add("target-byteorder-little-endian")
-if sys.platform in ["win32"]:
+if sys.platform in ["win32", "cygwin"]:
# ExecutionEngine, no weak symbols in COFF.
config.available_features.add("uses_COFF")
-else:
+
+if sys.platform not in ["win32"]:
# Others/can-execute.txt
config.available_features.add("can-execute")
@@ -451,7 +468,7 @@ if config.link_llvm_dylib:
"%llvmdylib",
"{}/libLLVM{}.{}".format(
config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version
- )
+ ),
)
)
@@ -582,6 +599,7 @@ def have_ld64_plugin_support():
if have_ld64_plugin_support():
config.available_features.add("ld64_plugin")
+
def host_unwind_supports_jit():
# Do we expect the host machine to support JIT registration of clang's
# default unwind info format for the host (e.g. eh-frames, compact-unwind,
@@ -589,7 +607,7 @@ def host_unwind_supports_jit():
# Linux and the BSDs use DWARF eh-frames and all known unwinders support
# register_frame at minimum.
- if platform.system() in [ "Linux", "FreeBSD", "NetBSD" ]:
+ if platform.system() in ["Linux", "FreeBSD", "NetBSD"]:
return True
# Windows does not support frame info without the ORC runtime.
@@ -601,11 +619,7 @@ def host_unwind_supports_jit():
# compact-unwind only, and JIT'd registration is not available before
# macOS 14.0.
if platform.system() == "Darwin":
-
- assert (
- "arm64" in config.host_triple
- or "x86_64" in config.host_triple
- )
+ assert "arm64" in config.host_triple or "x86_64" in config.host_triple
if "x86_64" in config.host_triple:
return True
@@ -627,6 +641,7 @@ def host_unwind_supports_jit():
return False
+
if host_unwind_supports_jit():
config.available_features.add("host-unwind-supports-jit")
@@ -659,7 +674,7 @@ if not hasattr(sys, "getwindowsversion") or sys.getwindowsversion().build >= 170
# .debug_frame is not emitted for targeting Windows x64, aarch64/arm64, AIX, or Apple Silicon Mac.
if not re.match(
- r"^(x86_64|aarch64|arm64|powerpc|powerpc64).*-(windows-gnu|windows-msvc|aix)",
+ r"^(x86_64|aarch64|arm64|powerpc|powerpc64).*-(windows-cygnus|windows-gnu|windows-msvc|aix)",
config.target_triple,
) and not re.match(r"^arm64(e)?-apple-(macos|darwin)", config.target_triple):
config.available_features.add("debug_frame")
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test
index 6368451..bd1c57d 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test
@@ -3,6 +3,7 @@
# Replace llc with cat, so we can simulate hypothetical output without actually running llc.
# Copy the simulated output to the temporary directory.
-# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments-O0.s %T/ && cp -f %S/Inputs/amdgpu_no_merge_comments-O3.s %T/
-# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments.ll %t.ll && %update_llc_test_checks --llc-binary cat %t.ll
-# RUN: diff -u %S/Inputs/amdgpu_no_merge_comments.ll.expected %t.ll
+# RUN: mkdir -p %t.dir
+# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments-O0.s %t.dir/ && cp -f %S/Inputs/amdgpu_no_merge_comments-O3.s %t.dir/
+# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments.ll %t.dir/file.ll && %update_llc_test_checks --llc-binary cat %t.dir/file.ll
+# RUN: diff -u %S/Inputs/amdgpu_no_merge_comments.ll.expected %t.dir/file.ll
diff --git a/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test b/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test
index bc59414..70a263b 100644
--- a/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test
+++ b/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test
@@ -1,12 +1,13 @@
-RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL
-RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL
-RUN: llvm-readobj --sections --section-data %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ
-RUN: llvm-dwarfdump --show-section-sizes %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP
+RUN: rm -rf %t && mkdir %t
+RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL
+RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL
+RUN: llvm-readobj --sections --section-data %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ
+RUN: llvm-dwarfdump --show-section-sizes %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP
-RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL
-RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL
-RUN: llvm-readobj --sections --section-data %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ
-RUN: llvm-dwarfdump --show-section-sizes %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP
+RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL
+RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL
+RUN: llvm-readobj --sections --section-data %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ
+RUN: llvm-dwarfdump --show-section-sizes %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP
The tested object file has been created by the dummy Swift code:
@@ -27,5 +28,5 @@ READOBJ-NEXT: |.|
DWARFDUMP: __swift_ast
-RUN: dsymutil -s %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=NAST
+RUN: dsymutil -s %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=NAST
NAST-NOT: N_AST
diff --git a/llvm/test/tools/llvm-dwarfdump/dump_dwo.test b/llvm/test/tools/llvm-dwarfdump/dump_dwo.test
index 7405955..c5f22a2 100644
--- a/llvm/test/tools/llvm-dwarfdump/dump_dwo.test
+++ b/llvm/test/tools/llvm-dwarfdump/dump_dwo.test
@@ -1,9 +1,10 @@
## Check that llvm-dwarfdump works when dumping .o files with .dwo files.
-# RUN: yaml2obj %p/Inputs/dump_dwo.o.yaml -o %T/dump_dwo.o
-# RUN: yaml2obj %p/Inputs/dump_dwo.dwo.yaml -o %T/dump_dwo.dwo
+# RUN: mkdir -p %t.dir
+# RUN: yaml2obj %p/Inputs/dump_dwo.o.yaml -o %t.dir/dump_dwo.o
+# RUN: yaml2obj %p/Inputs/dump_dwo.dwo.yaml -o %t.dir/dump_dwo.dwo
## We need to be in the same directory so we can find .dwo file
-# RUN: cd %T
+# RUN: cd %t.dir
# RUN: llvm-dwarfdump --dwo dump_dwo.o | FileCheck %s
# RUN: llvm-dwarfdump --dwo --name int --name char dump_dwo.o | FileCheck %s --check-prefix=NAMES
# RUN: llvm-dwarfdump --name int --name char dump_dwo.o | FileCheck %s --check-prefix=NAMES_NO_DWO
diff --git a/llvm/test/tools/llvm-ir2vec/embeddings.ll b/llvm/test/tools/llvm-ir2vec/embeddings.ll
index 993ea86..f9aa108 100644
--- a/llvm/test/tools/llvm-ir2vec/embeddings.ll
+++ b/llvm/test/tools/llvm-ir2vec/embeddings.ll
@@ -1,10 +1,10 @@
-; RUN: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
-; RUN: llvm-ir2vec --mode=embeddings --level=func --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
-; RUN: llvm-ir2vec --mode=embeddings --level=func --function=abc --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC
-; RUN: not llvm-ir2vec --mode=embeddings --level=func --function=def --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF
-; RUN: llvm-ir2vec --mode=embeddings --level=bb --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL
-; RUN: llvm-ir2vec --mode=embeddings --level=bb --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT
-; RUN: llvm-ir2vec --mode=embeddings --level=inst --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT
+; RUN: llvm-ir2vec embeddings --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-DEFAULT
+; RUN: llvm-ir2vec embeddings --level=func --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL
+; RUN: llvm-ir2vec embeddings --level=func --function=abc --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-FUNC-LEVEL-ABC
+; RUN: not llvm-ir2vec embeddings --level=func --function=def --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-DEF
+; RUN: llvm-ir2vec embeddings --level=bb --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL
+; RUN: llvm-ir2vec embeddings --level=bb --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-BB-LEVEL-ABC-REPEAT
+; RUN: llvm-ir2vec embeddings --level=inst --function=abc_repeat --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s | FileCheck %s -check-prefix=CHECK-INST-LEVEL-ABC-REPEAT
define dso_local noundef float @abc(i32 noundef %a, float noundef %b) #0 {
entry:
diff --git a/llvm/test/tools/llvm-ir2vec/entities.ll b/llvm/test/tools/llvm-ir2vec/entities.ll
index 57c3d6f..737044c 100644
--- a/llvm/test/tools/llvm-ir2vec/entities.ll
+++ b/llvm/test/tools/llvm-ir2vec/entities.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-ir2vec --mode=entities | FileCheck %s
+; RUN: llvm-ir2vec entities | FileCheck %s
CHECK: 92
CHECK-NEXT: Ret 0
diff --git a/llvm/test/tools/llvm-ir2vec/error-handling.ll b/llvm/test/tools/llvm-ir2vec/error-handling.ll
index c23c529..b944ea0 100644
--- a/llvm/test/tools/llvm-ir2vec/error-handling.ll
+++ b/llvm/test/tools/llvm-ir2vec/error-handling.ll
@@ -1,14 +1,7 @@
; Test error handling and input validation for llvm-ir2vec tool
-; RUN: not llvm-ir2vec --mode=embeddings %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-VOCAB
-
-; RUN: not llvm-ir2vec --mode=embeddings --function=nonexistent --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-NOT-FOUND
-
-; RUN: llvm-ir2vec --mode=triplets --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json --level=inst %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-LEVEL
-; RUN: llvm-ir2vec --mode=entities --level=inst %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-LEVEL
-
-; RUN: llvm-ir2vec --mode=triplets --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json --function=dummy %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-FUNC
-; RUN: llvm-ir2vec --mode=entities --function=dummy %s 2>&1 | FileCheck %s -check-prefix=CHECK-UNUSED-FUNC
+; RUN: not llvm-ir2vec embeddings %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-VOCAB
+; RUN: not llvm-ir2vec embeddings --function=nonexistent --ir2vec-vocab-path=%ir2vec_test_vocab_dir/dummy_3D_nonzero_opc_vocab.json %s 2>&1 | FileCheck %s -check-prefix=CHECK-FUNC-NOT-FOUND
; Simple test function for valid IR
define i32 @test_func(i32 %a) {
@@ -18,5 +11,3 @@ entry:
; CHECK-NO-VOCAB: error: IR2Vec vocabulary file path not specified; You may need to set it using --ir2vec-vocab-path
; CHECK-FUNC-NOT-FOUND: Error: Function 'nonexistent' not found
-; CHECK-UNUSED-LEVEL: Warning: --level option is ignored
-; CHECK-UNUSED-FUNC: Warning: --function option is ignored
diff --git a/llvm/test/tools/llvm-ir2vec/triplets.ll b/llvm/test/tools/llvm-ir2vec/triplets.ll
index dcd1dc9..a7fd9e4 100644
--- a/llvm/test/tools/llvm-ir2vec/triplets.ll
+++ b/llvm/test/tools/llvm-ir2vec/triplets.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-ir2vec --mode=triplets %s | FileCheck %s -check-prefix=TRIPLETS
+; RUN: llvm-ir2vec triplets %s | FileCheck %s -check-prefix=TRIPLETS
define i32 @simple_add(i32 %a, i32 %b) {
entry:
diff --git a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test
index 43a88f5..e8a5885 100644
--- a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test
+++ b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test
@@ -1,112 +1,113 @@
## This test checks that -l and -L options work correctly.
-# RUN: yaml2obj %S/Inputs/input1.yaml -o %t-input1.o
-# RUN: yaml2obj %S/Inputs/input2.yaml -o %t-input2.o
+# RUN: mkdir -p %t.dir
+# RUN: yaml2obj %S/Inputs/input1.yaml -o %t.dir/input1.o
+# RUN: yaml2obj %S/Inputs/input2.yaml -o %t.dir/input2.o
## Check that the library is recognised when it ends with '.o':
-# RUN: llvm-libtool-darwin -static -o %t.lib -l%basename_t.tmp-input1.o -l%basename_t.tmp-input2.o -L%T
+# RUN: llvm-libtool-darwin -static -o %t.lib -linput1.o -linput2.o -L%t.dir
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS --match-full-lines
-# CHECK-NAMES: [[PREFIX]]-input1.o
-# CHECK-NAMES-NEXT: [[PREFIX]]-input2.o
+# CHECK-NAMES: input1.o
+# CHECK-NAMES-NEXT: input2.o
# CHECK-SYMBOLS: Archive map
-# CHECK-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o
-# CHECK-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o
+# CHECK-SYMBOLS-NEXT: _symbol1 in input1.o
+# CHECK-SYMBOLS-NEXT: _symbol2 in input2.o
# CHECK-SYMBOLS-EMPTY:
## Check that the library is recognised when prepended with 'lib' and appended with '.a':
# RUN: rm -rf %t/dirname && mkdir -p %t/dirname
-# RUN: llvm-ar cr %t/dirname/libinput2.a %t-input2.o
+# RUN: llvm-ar cr %t/dirname/libinput2.a %t.dir/input2.o
# RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -L%t/dirname
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS --match-full-lines
-# SINGLE-NAMES: [[PREFIX]]-input2.o
+# SINGLE-NAMES: input2.o
# SINGLE-SYMBOLS: Archive map
-# SINGLE-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o
+# SINGLE-SYMBOLS-NEXT: _symbol2 in input2.o
# SINGLE-SYMBOLS-EMPTY:
## -l and -L option specified multiple times:
# RUN: rm -rf %t/otherDirname && mkdir -p %t/otherDirname
-# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t-input1.o
+# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t.dir/input1.o
# RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -linput1 -L%t/dirname -L%t/otherDirname
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS --match-full-lines
## Check it is possible to pass arguments to -l and -L separated from the option
## and the options specified multiple times:
# RUN: rm -rf %t/otherDirname && mkdir -p %t/otherDirname
-# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t-input1.o
+# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t.dir/input1.o
# RUN: llvm-libtool-darwin -static -o %t.lib -l input2 -l input1 -L %t/dirname -L %t/otherDirname
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS --match-full-lines
-# OTHER-NAMES: [[PREFIX]]-input2.o
-# OTHER-NAMES-NEXT: [[PREFIX]]-input1.o
+# OTHER-NAMES: input2.o
+# OTHER-NAMES-NEXT: input1.o
# OTHER-SYMBOLS: Archive map
-# OTHER-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o
-# OTHER-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o
+# OTHER-SYMBOLS-NEXT: _symbol2 in input2.o
+# OTHER-SYMBOLS-NEXT: _symbol1 in input1.o
# OTHER-SYMBOLS-EMPTY:
## Check that if multiple directories specified with -L have the same named file
## in them, the file from the first directory is selected.
-# RUN: llvm-ar cr %t/otherDirname/libinput2.a %t-input1.o
+# RUN: llvm-ar cr %t/otherDirname/libinput2.a %t.dir/input1.o
# RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -L%t/dirname -L%t/otherDirname
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS --match-full-lines
## Check that if two different files with the same names are explicitly
## specified, the command gives a warning.
-# RUN: cp %t-input2.o %t/dirname
+# RUN: cp %t.dir/input2.o %t/dirname
# RUN: llvm-libtool-darwin -static -o %t.lib \
-# RUN: %t/dirname/%basename_t.tmp-input2.o %t-input2.o 2>&1 | \
+# RUN: %t/dirname/input2.o %t.dir/input2.o 2>&1 | \
# RUN: FileCheck %s --check-prefix=DUPLICATE-INPUT \
-# RUN: -DFILE=%basename_t.tmp-input2.o \
-# RUN: -DINPUTA=%t/dirname/%basename_t.tmp-input2.o \
-# RUN: -DINPUTB=%t-input2.o
+# RUN: -DFILE=input2.o \
+# RUN: -DINPUTA=%t/dirname/input2.o \
+# RUN: -DINPUTB=%t.dir/input2.o
# DUPLICATE-INPUT: warning: file '[[FILE]]' was specified multiple times.
# DUPLICATE-INPUT-DAG: [[INPUTA]]
# DUPLICATE-INPUT-DAG: [[INPUTB]]
## -l option combined with an input file:
-# RUN: llvm-libtool-darwin -static -o %t.lib %t-input1.o -linput2 -L%t/dirname
+# RUN: llvm-libtool-darwin -static -o %t.lib %t.dir/input1.o -linput2 -L%t/dirname
# RUN: llvm-ar t %t.lib | \
-# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
+# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}}
# RUN: llvm-nm --print-armap %t.lib | \
-# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines
+# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS --match-full-lines
## Specify the same file with a -l option and an input file:
# RUN: rm -rf %t/copy
# RUN: mkdir -p %t/copy
-# RUN: cp %t-input1.o %t/copy
+# RUN: cp %t.dir/input1.o %t/copy
# RUN: llvm-libtool-darwin -static -o %t.lib \
-# RUN: %t/copy/%basename_t.tmp-input1.o -l%basename_t.tmp-input1.o -L%t/copy 2>&1 | \
-# RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT -DFILE=%basename_t.tmp-input1.o
+# RUN: %t/copy/input1.o -linput1.o -L%t/copy 2>&1 | \
+# RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT -DFILE=input1.o
## Specify same -l option twice:
-# RUN: llvm-libtool-darwin -static -o %t.lib -l%basename_t.tmp-input1.o \
-# RUN: -l%basename_t.tmp-input1.o -L%t/copy 2>&1 | \
+# RUN: llvm-libtool-darwin -static -o %t.lib -linput1.o \
+# RUN: -linput1.o -L%t/copy 2>&1 | \
# RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT \
-# RUN: -DFILE=%basename_t.tmp-input1.o
+# RUN: -DFILE=input1.o
# DUPLICATE-L-INPUT: warning: file '[[FILE]]' was specified multiple times.
@@ -123,11 +124,11 @@
## Check that an error is thrown when the input library cannot be found
## (since 'lib' and '.a' are added):
-# RUN: llvm-ar cr %t/dirname/file-does-exist %t-input1.o
+# RUN: llvm-ar cr %t/dirname/file-does-exist %t.dir/input1.o
# RUN: not llvm-libtool-darwin -static -o %t.lib -lfile-does-exist -L%t/dirname 2>&1 | \
# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=libfile-does-exist.a
-# RUN: llvm-ar cr %t/dirname/libfile-does-exist.a %t-input1.o
+# RUN: llvm-ar cr %t/dirname/libfile-does-exist.a %t.dir/input1.o
# RUN: not llvm-libtool-darwin -static -o %t.lib -llibfile-does-exist.a -L%t/dirname 2>&1 | \
# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=liblibfile-does-exist.a.a
@@ -145,13 +146,13 @@
# RUN: FileCheck %s --check-prefix=NOT-VALID -DFILE=libnot-valid.a
## Check that 'lib' and '.a' are not added to a file ending in '.o':
-# RUN: llvm-ar cr %t/dirname/libfoo.o.a %t-input1.o
+# RUN: llvm-ar cr %t/dirname/libfoo.o.a %t.dir/input1.o
# RUN: not llvm-libtool-darwin -static -o %t.lib -lfoo.o -L%t/dirname 2>&1 | \
# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=foo.o
## Check that 'lib' and '.a' are added to a file ending in any other extension
## beside '.o' (e.g. '.ext'):
-# RUN: llvm-ar cr %t/dirname/libbar.ext.a %t-input2.o
+# RUN: llvm-ar cr %t/dirname/libbar.ext.a %t.dir/input2.o
# RUN: llvm-libtool-darwin -static -o %t.lib -lbar.ext -L%t/dirname
# RUN: llvm-ar t %t.lib | \
# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp
diff --git a/llvm/test/tools/llvm-objcopy/COFF/dump-section.test b/llvm/test/tools/llvm-objcopy/COFF/dump-section.test
index 591135a..e96febc 100644
--- a/llvm/test/tools/llvm-objcopy/COFF/dump-section.test
+++ b/llvm/test/tools/llvm-objcopy/COFF/dump-section.test
@@ -4,7 +4,7 @@
# RUN: llvm-objcopy --dump-section .text.f=%t.txt %t.obj
# RUN: od -t x1 %t.txt | FileCheck %s --ignore-case -check-prefix CHECK-TEXT-F
# RUN: not llvm-objcopy --dump-section non-existent=/dev/null %t.obj 2>&1 | FileCheck %s -check-prefix CHECK-NO-SECTION
-# RUN: not llvm-objcopy --dump-section .text=%T %t.obj 2>&1 | FileCheck -DOBJ=%t.obj -DMSG=%errc_EISDIR %s -check-prefix CHECK-INVALID-DESTINATION
+# RUN: not llvm-objcopy --dump-section .text=%S %t.obj 2>&1 | FileCheck -DOBJ=%t.obj -DMSG=%errc_EISDIR %s -check-prefix CHECK-INVALID-DESTINATION
# CHECK-EMPTY-SIZE: 0
diff --git a/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll b/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll
new file mode 100644
index 0000000..d9ed9df
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-reduce %s -o %t --abort-on-invalid-reduction --delta-passes=operands-to-args --test FileCheck --test-arg %s --test-arg --check-prefix=INTERESTING --test-arg --input-file
+; RUN: FileCheck %s --input-file %t --check-prefix=REDUCED
+
+; INTERESTING: store
+; REDUCED: define void @test(ptr %a) {
+; REDUCED-NEXT: %a1 = alloca i32
+; REDUCED-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %a1)
+; REDUCED-NEXT: store i32 0, ptr %a
+; REDUCED-NEXT: store i32 1, ptr %a
+; REDUCED-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %a1)
+define void @test() {
+ %a = alloca i32
+ call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+ store i32 0, ptr %a
+ store i32 1, ptr %a
+ call void @llvm.lifetime.end.p0(i64 4, ptr %a)
+ ret void
+}